import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import researchpy as rp
df = pd.read_csv('dataset.csv')
df.head()
| loan_id | no_of_dependents | education | self_employed | income_annum | loan_amount | loan_term | cibil_score | residential_assets_value | commercial_assets_value | luxury_assets_value | bank_asset_value | loan_status | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 2 | Graduate | No | 9600000 | 29900000 | 12 | 778 | 2400000 | 17600000 | 22700000 | 8000000 | Approved |
| 1 | 2 | 0 | Not Graduate | Yes | 4100000 | 12200000 | 8 | 417 | 2700000 | 2200000 | 8800000 | 3300000 | Rejected |
| 2 | 3 | 3 | Graduate | No | 9100000 | 29700000 | 20 | 506 | 7100000 | 4500000 | 33300000 | 12800000 | Rejected |
| 3 | 4 | 3 | Graduate | No | 8200000 | 30700000 | 8 | 467 | 18200000 | 3300000 | 23300000 | 7900000 | Rejected |
| 4 | 5 | 5 | Not Graduate | Yes | 9800000 | 24200000 | 20 | 382 | 12400000 | 8200000 | 29400000 | 5000000 | Rejected |
df.tail()
| loan_id | no_of_dependents | education | self_employed | income_annum | loan_amount | loan_term | cibil_score | residential_assets_value | commercial_assets_value | luxury_assets_value | bank_asset_value | loan_status | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4264 | 4265 | 5 | Graduate | Yes | 1000000 | 2300000 | 12 | 317 | 2800000 | 500000 | 3300000 | 800000 | Rejected |
| 4265 | 4266 | 0 | Not Graduate | Yes | 3300000 | 11300000 | 20 | 559 | 4200000 | 2900000 | 11000000 | 1900000 | Approved |
| 4266 | 4267 | 2 | Not Graduate | No | 6500000 | 23900000 | 18 | 457 | 1200000 | 12400000 | 18100000 | 7300000 | Rejected |
| 4267 | 4268 | 1 | Not Graduate | No | 4100000 | 12800000 | 8 | 780 | 8200000 | 700000 | 14100000 | 5800000 | Approved |
| 4268 | 4269 | 1 | Graduate | No | 9200000 | 29700000 | 10 | 607 | 17800000 | 11800000 | 35700000 | 12000000 | Approved |
df.shape
(4269, 13)
df.describe()
| loan_id | no_of_dependents | income_annum | loan_amount | loan_term | cibil_score | residential_assets_value | commercial_assets_value | luxury_assets_value | bank_asset_value | |
|---|---|---|---|---|---|---|---|---|---|---|
| count | 4269.000000 | 4269.000000 | 4.269000e+03 | 4.269000e+03 | 4269.000000 | 4269.000000 | 4.269000e+03 | 4.269000e+03 | 4.269000e+03 | 4.269000e+03 |
| mean | 2135.000000 | 2.498712 | 5.059124e+06 | 1.513345e+07 | 10.900445 | 599.936051 | 7.472617e+06 | 4.973155e+06 | 1.512631e+07 | 4.976692e+06 |
| std | 1232.498479 | 1.695910 | 2.806840e+06 | 9.043363e+06 | 5.709187 | 172.430401 | 6.503637e+06 | 4.388966e+06 | 9.103754e+06 | 3.250185e+06 |
| min | 1.000000 | 0.000000 | 2.000000e+05 | 3.000000e+05 | 2.000000 | 300.000000 | -1.000000e+05 | 0.000000e+00 | 3.000000e+05 | 0.000000e+00 |
| 25% | 1068.000000 | 1.000000 | 2.700000e+06 | 7.700000e+06 | 6.000000 | 453.000000 | 2.200000e+06 | 1.300000e+06 | 7.500000e+06 | 2.300000e+06 |
| 50% | 2135.000000 | 3.000000 | 5.100000e+06 | 1.450000e+07 | 10.000000 | 600.000000 | 5.600000e+06 | 3.700000e+06 | 1.460000e+07 | 4.600000e+06 |
| 75% | 3202.000000 | 4.000000 | 7.500000e+06 | 2.150000e+07 | 16.000000 | 748.000000 | 1.130000e+07 | 7.600000e+06 | 2.170000e+07 | 7.100000e+06 |
| max | 4269.000000 | 5.000000 | 9.900000e+06 | 3.950000e+07 | 20.000000 | 900.000000 | 2.910000e+07 | 1.940000e+07 | 3.920000e+07 | 1.470000e+07 |
df.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| loan_id | 4269.0 | 2.135000e+03 | 1.232498e+03 | 1.0 | 1068.0 | 2135.0 | 3202.0 | 4269.0 |
| no_of_dependents | 4269.0 | 2.498712e+00 | 1.695910e+00 | 0.0 | 1.0 | 3.0 | 4.0 | 5.0 |
| income_annum | 4269.0 | 5.059124e+06 | 2.806840e+06 | 200000.0 | 2700000.0 | 5100000.0 | 7500000.0 | 9900000.0 |
| loan_amount | 4269.0 | 1.513345e+07 | 9.043363e+06 | 300000.0 | 7700000.0 | 14500000.0 | 21500000.0 | 39500000.0 |
| loan_term | 4269.0 | 1.090045e+01 | 5.709187e+00 | 2.0 | 6.0 | 10.0 | 16.0 | 20.0 |
| cibil_score | 4269.0 | 5.999361e+02 | 1.724304e+02 | 300.0 | 453.0 | 600.0 | 748.0 | 900.0 |
| residential_assets_value | 4269.0 | 7.472617e+06 | 6.503637e+06 | -100000.0 | 2200000.0 | 5600000.0 | 11300000.0 | 29100000.0 |
| commercial_assets_value | 4269.0 | 4.973155e+06 | 4.388966e+06 | 0.0 | 1300000.0 | 3700000.0 | 7600000.0 | 19400000.0 |
| luxury_assets_value | 4269.0 | 1.512631e+07 | 9.103754e+06 | 300000.0 | 7500000.0 | 14600000.0 | 21700000.0 | 39200000.0 |
| bank_asset_value | 4269.0 | 4.976692e+06 | 3.250185e+06 | 0.0 | 2300000.0 | 4600000.0 | 7100000.0 | 14700000.0 |
df.isnull().sum()
loan_id 0 no_of_dependents 0 education 0 self_employed 0 income_annum 0 loan_amount 0 loan_term 0 cibil_score 0 residential_assets_value 0 commercial_assets_value 0 luxury_assets_value 0 bank_asset_value 0 loan_status 0 dtype: int64
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 4269 entries, 0 to 4268 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 loan_id 4269 non-null int64 1 no_of_dependents 4269 non-null int64 2 education 4269 non-null object 3 self_employed 4269 non-null object 4 income_annum 4269 non-null int64 5 loan_amount 4269 non-null int64 6 loan_term 4269 non-null int64 7 cibil_score 4269 non-null int64 8 residential_assets_value 4269 non-null int64 9 commercial_assets_value 4269 non-null int64 10 luxury_assets_value 4269 non-null int64 11 bank_asset_value 4269 non-null int64 12 loan_status 4269 non-null object dtypes: int64(10), object(3) memory usage: 433.7+ KB
# object türündeki sütunları category türüne çevirelim
object_columns = df.select_dtypes(include='object').columns
df[object_columns] = df[object_columns].astype('category')
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 4269 entries, 0 to 4268 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 loan_id 4269 non-null int64 1 no_of_dependents 4269 non-null int64 2 education 4269 non-null category 3 self_employed 4269 non-null category 4 income_annum 4269 non-null int64 5 loan_amount 4269 non-null int64 6 loan_term 4269 non-null int64 7 cibil_score 4269 non-null int64 8 residential_assets_value 4269 non-null int64 9 commercial_assets_value 4269 non-null int64 10 luxury_assets_value 4269 non-null int64 11 bank_asset_value 4269 non-null int64 12 loan_status 4269 non-null category dtypes: category(3), int64(10) memory usage: 346.5 KB
for column in df.columns:
unique_values = df[column].unique()
print(f"Unique values in column '{column}':")
print(unique_values)
print()
Unique values in column 'loan_id': [ 1 2 3 ... 4267 4268 4269] Unique values in column ' no_of_dependents': [2 0 3 5 4 1] Unique values in column ' education': [' Graduate', ' Not Graduate'] Categories (2, object): [' Graduate', ' Not Graduate'] Unique values in column ' self_employed': [' No', ' Yes'] Categories (2, object): [' No', ' Yes'] Unique values in column ' income_annum': [9600000 4100000 9100000 8200000 9800000 4800000 8700000 5700000 800000 1100000 2900000 6700000 5000000 1900000 4700000 500000 2700000 6300000 5800000 6500000 4900000 3100000 2400000 7000000 9000000 8400000 1700000 1600000 8000000 3600000 1500000 7800000 1400000 4200000 5500000 9500000 7300000 3800000 5100000 4300000 9300000 7400000 8500000 8800000 3300000 3900000 8300000 5600000 5300000 2600000 700000 3500000 9900000 3000000 6800000 2000000 1000000 300000 6600000 9400000 4400000 400000 6200000 9700000 7100000 600000 7200000 900000 200000 1800000 4600000 2200000 2500000 8600000 4000000 5200000 8900000 1300000 4500000 8100000 9200000 2800000 7500000 6400000 6900000 7700000 3200000 7900000 5900000 3400000 2100000 3700000 5400000 2300000 7600000 6000000 6100000 1200000] Unique values in column ' loan_amount': [29900000 12200000 29700000 30700000 24200000 13500000 33000000 15000000 2200000 4300000 11200000 22700000 11600000 31500000 7400000 10700000 1600000 9400000 10300000 14600000 19400000 14000000 25700000 1400000 9800000 9500000 28100000 5600000 24000000 25300000 12000000 22000000 11900000 3400000 6200000 27200000 7700000 5100000 18100000 24900000 2300000 13400000 27800000 19100000 20500000 25400000 24700000 7600000 23000000 19700000 24500000 10600000 30500000 18400000 18200000 18900000 28900000 7500000 12300000 29100000 10100000 12400000 5000000 1500000 18600000 18300000 16700000 8400000 6500000 14800000 33500000 29400000 8900000 31200000 21200000 8600000 8200000 3800000 28300000 8000000 37600000 21100000 20700000 6400000 2000000 1100000 25000000 10800000 900000 12900000 4500000 23600000 9700000 35900000 6800000 22100000 23400000 23200000 15800000 32900000 3200000 18700000 19500000 600000 800000 2600000 1200000 20800000 22600000 3600000 13900000 5500000 6700000 8500000 700000 17400000 32100000 11100000 19300000 28800000 20600000 35000000 33300000 1300000 9600000 15100000 5300000 22300000 15900000 12800000 35200000 17500000 10500000 4100000 28200000 14300000 13300000 17900000 9900000 23100000 3100000 10900000 30400000 23300000 19800000 2900000 13200000 27100000 6000000 16400000 15600000 30100000 20900000 15400000 3300000 32700000 15200000 7800000 17000000 11300000 10400000 11000000 1700000 27000000 3500000 32400000 34600000 15500000 22500000 16200000 29300000 9100000 30900000 4700000 2400000 35400000 20000000 38800000 8100000 19600000 34300000 22200000 14400000 16800000 27900000 20400000 4900000 4000000 19900000 1800000 11800000 25500000 9300000 20200000 1000000 38200000 6600000 33200000 24400000 14100000 28700000 23500000 17100000 5700000 7000000 16900000 21000000 12600000 28000000 17200000 24100000 26300000 38400000 32200000 26900000 21900000 12500000 29800000 17600000 29000000 35300000 14500000 22400000 8700000 6300000 5200000 2700000 25900000 21500000 15700000 7300000 7200000 18800000 2100000 4600000 5400000 15300000 2800000 36400000 27500000 11400000 18500000 26700000 5800000 21400000 2500000 14700000 17300000 28600000 31900000 8800000 30300000 24800000 19200000 39500000 9000000 19000000 23900000 31800000 10000000 14200000 22800000 32300000 500000 6100000 5900000 24600000 16000000 12700000 13100000 6900000 27700000 29200000 13800000 18000000 27400000 17800000 20100000 32600000 4800000 11500000 22900000 1900000 300000 16600000 28500000 25100000 25800000 21800000 30000000 10200000 31700000 26000000 8300000 35500000 33900000 17700000 4200000 9200000 20300000 400000 34700000 26500000 16500000 14900000 37000000 27300000 26200000 25600000 16300000 24300000 21600000 11700000 34200000 34500000 13000000 23700000 30800000 3900000 13600000 38700000 26600000 37900000 21700000 29600000 23800000 34000000 25200000 35700000 26100000 16100000 13700000 38000000 37500000 7900000 34400000 37300000 21300000 28400000 35800000 38500000 34900000 33600000 36800000 31400000 3000000 4400000 26400000 37800000 7100000 34100000 30200000 32000000 31300000 12100000 36700000 30600000 3700000 31600000 29500000 31000000 34800000 36500000 36000000 36300000 31100000 26800000 35100000 32800000 33100000 32500000 33400000 27600000 33700000 36600000 33800000 37700000 36100000] Unique values in column ' loan_term': [12 8 20 10 4 2 18 16 14 6] Unique values in column ' cibil_score': [778 417 506 467 382 319 678 782 388 547 538 311 679 469 794 663 780 736 652 315 530 551 324 514 696 662 336 850 313 363 436 830 612 691 636 348 352 712 822 540 342 787 331 677 634 502 435 689 657 590 818 431 841 421 797 478 669 365 586 784 364 715 693 777 312 340 386 418 735 494 671 697 801 576 639 470 826 613 713 439 387 402 837 641 489 844 452 366 300 861 562 463 702 618 633 764 591 719 317 302 879 437 456 647 379 717 545 570 865 821 859 395 429 565 357 465 479 425 786 564 501 727 894 829 802 543 772 572 709 481 306 415 548 701 890 704 318 761 524 681 737 638 656 341 371 886 748 376 873 309 869 534 566 742 824 575 766 888 622 458 327 682 583 816 455 355 389 870 827 768 707 665 420 471 819 809 744 484 673 695 473 491 733 434 774 503 598 796 632 770 667 585 851 378 807 831 674 725 600 536 477 560 539 852 853 729 546 789 325 716 523 345 649 666 813 599 513 483 308 651 433 403 405 516 468 672 549 450 320 476 573 877 531 474 499 726 485 708 404 512 441 555 466 427 593 731 451 628 424 381 449 445 781 721 563 419 372 885 596 349 685 377 620 611 767 592 900 814 755 584 380 655 833 658 648 730 621 610 339 650 367 847 360 880 608 760 385 710 711 855 771 338 769 629 699 391 891 775 897 839 868 353 792 635 457 350 874 411 482 396 303 728 698 490 504 790 860 492 834 443 329 739 867 307 375 601 756 838 442 808 597 373 552 607 823 328 580 559 587 817 765 383 843 783 409 625 645 887 791 686 722 407 895 453 627 889 684 578 369 557 519 741 508 493 664 362 703 758 828 528 623 579 846 589 845 401 522 588 863 798 668 881 406 799 743 734 812 459 448 517 426 785 472 683 803 361 464 747 335 394 848 788 509 899 595 322 631 330 567 323 670 609 354 746 857 556 393 688 384 414 815 854 849 346 856 440 616 461 866 820 544 561 614 351 399 344 301 763 624 644 642 423 724 706 811 326 488 475 337 511 810 428 356 594 480 757 321 368 806 832 571 527 333 532 754 835 553 400 558 515 740 447 745 495 660 883 795 762 462 779 752 305 310 525 661 884 800 568 842 653 617 460 804 358 836 554 840 430 347 550 878 603 444 875 343 714 529 446 705 898 487 615 676 605 569 410 753 454 619 858 392 637 359 723 304 690 862 496 659 542 749 694 574 692 521 541 640 630 535 422 606 370 700 751 896 577 537 316 412 793 390 397 876 498 872 871 497 759 413 602 720 505 582 416 510 500 626 654 892 680 750 314 520 776 825 646 518 805 332 882 604 507 408 374 687 533 581 675 773 718 432 526 398 643 893 438 486 732 334 738 864] Unique values in column ' residential_assets_value': [ 2400000 2700000 7100000 18200000 12400000 6800000 22500000 13200000 1300000 3200000 8100000 15300000 6400000 10800000 1900000 5700000 2900000 1000000 10300000 9500000 3800000 13100000 900000 7900000 11500000 4500000 2300000 21800000 20200000 3600000 700000 9700000 3400000 7000000 100000 8600000 22300000 200000 2200000 13000000 5400000 800000 500000 8700000 15400000 7400000 1200000 2100000 19300000 18500000 -100000 23800000 4700000 24400000 1600000 7600000 6100000 5500000 4000000 18400000 3900000 6500000 600000 14300000 11600000 17600000 25500000 9400000 5300000 17100000 20400000 5100000 24100000 19200000 9100000 14700000 25900000 300000 11400000 7800000 19600000 5600000 7300000 19500000 16100000 1500000 12700000 26800000 12200000 400000 15100000 11700000 4400000 6600000 1100000 2600000 14600000 13600000 15900000 0 5800000 3700000 24200000 4900000 2500000 7700000 21900000 3300000 9800000 12100000 3000000 16800000 12600000 1700000 8800000 13700000 10000000 6300000 15200000 22000000 8300000 11300000 14400000 11100000 3100000 15500000 3500000 13800000 9000000 14100000 14800000 8500000 18700000 2800000 9200000 20000000 4100000 22800000 16500000 6000000 23200000 5000000 25600000 24500000 13400000 14000000 16000000 18100000 8000000 9900000 17200000 1800000 1400000 10400000 4200000 6900000 16600000 9600000 17400000 8400000 11900000 10500000 5900000 7200000 14200000 22900000 4300000 16900000 6200000 12500000 15700000 9300000 18000000 8200000 10700000 4800000 10200000 21500000 12900000 4600000 15600000 10600000 5200000 21700000 11000000 23300000 20800000 23000000 11800000 21100000 10900000 2000000 15800000 23400000 13500000 23900000 17300000 18300000 19400000 22100000 12000000 24000000 6700000 13900000 20600000 25400000 7500000 10100000 17700000 28300000 11200000 18800000 14500000 24900000 26300000 13300000 22400000 27600000 21400000 28700000 25300000 25800000 18600000 19100000 22200000 28200000 19700000 25200000 24700000 16700000 17000000 16300000 15000000 21300000 12800000 20300000 12300000 19900000 16200000 19000000 16400000 8900000 22700000 25700000 21200000 27000000 21600000 17800000 28500000 14900000 17900000 28400000 23700000 20500000 24600000 20100000 22600000 20900000 21000000 26600000 26200000 19800000 17500000 28000000 24800000 26900000 26100000 20700000 29100000 18900000 25100000 23500000 24300000 27500000 25000000 23100000 27400000 27300000] Unique values in column ' commercial_assets_value': [17600000 2200000 4500000 3300000 8200000 8300000 14800000 5700000 800000 1400000 4700000 5800000 9600000 16600000 1200000 3900000 100000 2800000 0 3500000 1600000 11300000 1700000 600000 8700000 3100000 10600000 4200000 11900000 12400000 5200000 7400000 200000 700000 300000 1300000 11200000 12100000 1500000 6300000 6900000 9100000 8600000 10500000 1800000 9300000 5600000 10300000 4900000 16300000 1900000 6100000 9700000 11700000 9400000 3800000 2500000 7800000 8900000 500000 11400000 13600000 2600000 4300000 3200000 1100000 400000 4800000 8500000 15200000 3600000 16500000 2700000 7600000 6000000 12200000 2000000 1000000 6200000 8000000 5900000 4100000 6500000 10000000 16700000 900000 2100000 9500000 5500000 4400000 18700000 5100000 11100000 12600000 5000000 6800000 2400000 7500000 2900000 10900000 11000000 11600000 2300000 3400000 11500000 8100000 5300000 6700000 10200000 10800000 4000000 4600000 7000000 6600000 17500000 16200000 12300000 12800000 13200000 16400000 19000000 16100000 8800000 3700000 5400000 8400000 12000000 15000000 9200000 17200000 11800000 14900000 13800000 7900000 10400000 18500000 12500000 13400000 9900000 12700000 15400000 14700000 15600000 14000000 16000000 13000000 14300000 9800000 18800000 13900000 7200000 7100000 15100000 15500000 13300000 3000000 13700000 7300000 17800000 6400000 17900000 12900000 14600000 10100000 18300000 9000000 14500000 14200000 17300000 13100000 10700000 16800000 18900000 18400000 18200000 14100000 14400000 7700000 17000000 15900000 15300000 19400000 16900000 13500000 17400000 15700000 15800000 17700000] Unique values in column ' luxury_assets_value': [22700000 8800000 33300000 23300000 29400000 13700000 29200000 11800000 2800000 3300000 9500000 20400000 14600000 20900000 5900000 16400000 1300000 6700000 6200000 23500000 18000000 22200000 19500000 1100000 10000000 6600000 25300000 5400000 27500000 33700000 25500000 21700000 2200000 19900000 19000000 6000000 5300000 16700000 5600000 31000000 3900000 1800000 16200000 21400000 8700000 17700000 18500000 37700000 20500000 21800000 9300000 31900000 19400000 16300000 34600000 17500000 18600000 25900000 26500000 27400000 10500000 13100000 14900000 24100000 4900000 1900000 11900000 21500000 12600000 4800000 12900000 35400000 25200000 2400000 12300000 26600000 10300000 11000000 3800000 27900000 23400000 12500000 22400000 3200000 700000 18200000 23200000 36400000 13800000 1200000 500000 11400000 4100000 23800000 20800000 9900000 11700000 900000 17900000 19300000 33400000 7700000 22600000 1500000 23600000 2700000 2000000 800000 15500000 33900000 25700000 4400000 13900000 8600000 7500000 7400000 12800000 24500000 2100000 5100000 7200000 18800000 18100000 2900000 36100000 14000000 8400000 27800000 8000000 10400000 17800000 4300000 27000000 16000000 5000000 23100000 18700000 9700000 17400000 6500000 33800000 5700000 20000000 8200000 14300000 26300000 26900000 26400000 27700000 24000000 22500000 28000000 31800000 12200000 38200000 38600000 19600000 21900000 3500000 27200000 3700000 15000000 34700000 23700000 8500000 10600000 16100000 21200000 13600000 7000000 18400000 7100000 14700000 9600000 11200000 24300000 20300000 6400000 23000000 25000000 6300000 22800000 31600000 29700000 29100000 30800000 13000000 26000000 14500000 16800000 29500000 28200000 19100000 26700000 5200000 4600000 7900000 16900000 9800000 15600000 30500000 30200000 12400000 3000000 20100000 8900000 19800000 35500000 28500000 25400000 16500000 17200000 4700000 28800000 3600000 400000 14200000 14800000 14100000 22900000 26100000 36500000 28600000 34500000 30900000 6900000 2300000 25100000 28900000 14400000 29900000 7600000 37000000 15800000 1400000 33500000 13500000 300000 13200000 1600000 8100000 5800000 10900000 11300000 10200000 13300000 34900000 17300000 22000000 32100000 20700000 26800000 27100000 10100000 21000000 19700000 600000 7300000 32000000 22100000 2600000 9100000 31100000 32700000 32800000 24900000 5500000 32600000 3400000 9000000 12700000 6800000 17100000 20200000 10800000 34100000 26200000 29000000 11600000 31300000 28400000 11100000 12000000 12100000 17000000 15100000 28300000 16600000 15300000 18900000 23900000 24400000 17600000 11500000 21100000 30000000 29600000 15200000 27600000 20600000 30400000 9400000 7800000 18300000 4200000 8300000 30100000 25800000 1700000 21600000 29300000 35700000 4500000 30300000 10700000 24800000 31500000 24700000 19200000 13400000 35100000 35600000 15900000 33000000 31700000 9200000 6100000 15400000 2500000 24600000 35800000 22300000 34300000 36600000 3100000 28700000 36900000 28100000 32500000 38100000 39200000 15700000 37800000 27300000 31200000 39100000 21300000 24200000 37200000 37900000 25600000 33600000 30600000 32900000 37400000 34000000 1000000 37600000 35900000 32300000 32400000 31400000 30700000 34400000 4000000 32200000 29800000 33200000 34800000 36000000 36200000 36800000 35300000 38000000 33100000 37300000 34200000 35000000 36700000] Unique values in column ' bank_asset_value': [ 8000000 3300000 12800000 7900000 5000000 5100000 4300000 6000000 600000 1600000 3100000 6400000 1900000 4400000 700000 5900000 6100000 5400000 8500000 300000 2600000 7200000 2500000 9700000 9300000 1000000 5800000 900000 1400000 7100000 2900000 9000000 5200000 800000 10900000 4900000 6500000 8200000 11700000 10500000 11300000 3400000 6200000 8700000 4100000 4800000 11400000 4700000 2800000 11900000 5500000 2400000 4200000 7600000 5600000 2000000 1100000 6300000 11100000 8600000 6800000 3600000 10200000 12700000 2100000 1300000 400000 7000000 7300000 100000 200000 11600000 1800000 9800000 8100000 7500000 13400000 9600000 3800000 8400000 3200000 1200000 4600000 8300000 4500000 3500000 2300000 7400000 1700000 9500000 3000000 2200000 9200000 4000000 11200000 500000 9400000 14400000 10000000 6600000 12500000 1500000 9100000 7700000 7800000 10300000 9900000 8800000 5700000 10400000 11800000 5300000 12400000 2700000 11500000 3900000 0 10800000 6700000 12900000 12300000 6900000 12200000 13500000 8900000 3700000 12100000 13600000 13100000 10600000 13900000 12000000 13000000 10100000 10700000 11000000 13200000 14700000 14000000 13300000 13800000 14600000 14300000 14200000 13700000 14100000] Unique values in column ' loan_status': [' Approved', ' Rejected'] Categories (2, object): [' Approved', ' Rejected']
category_columns = df.select_dtypes(include=["category"])
category_columns
| education | self_employed | loan_status | |
|---|---|---|---|
| 0 | Graduate | No | Approved |
| 1 | Not Graduate | Yes | Rejected |
| 2 | Graduate | No | Rejected |
| 3 | Graduate | No | Rejected |
| 4 | Not Graduate | Yes | Rejected |
| ... | ... | ... | ... |
| 4264 | Graduate | Yes | Rejected |
| 4265 | Not Graduate | Yes | Approved |
| 4266 | Not Graduate | No | Rejected |
| 4267 | Not Graduate | No | Approved |
| 4268 | Graduate | No | Approved |
4269 rows × 3 columns
category_columns.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 4269 entries, 0 to 4268 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 education 4269 non-null category 1 self_employed 4269 non-null category 2 loan_status 4269 non-null category dtypes: category(3) memory usage: 13.0 KB
print(category_columns.columns)
Index([' education', ' self_employed', ' loan_status'], dtype='object')
# Boşluk karakterini kaldırarak sütun adlarını yeniden adlandırma
category_columns = category_columns.rename(columns=lambda x: x.strip())
# Boşluk karakterini kaldırarak sütun adlarını yeniden adlandırma
df = df.rename(columns=lambda x: x.strip())
category_columns.education.unique()
[' Graduate', ' Not Graduate'] Categories (2, object): [' Graduate', ' Not Graduate']
category_columns["education"].value_counts().count()
2
category_columns["education"].value_counts()
education Graduate 2144 Not Graduate 2125 Name: count, dtype: int64
category_columns["education"].value_counts().plot.barh();
category_columns.self_employed.unique()
[' No', ' Yes'] Categories (2, object): [' No', ' Yes']
category_columns["self_employed"].value_counts().count()
2
category_columns["self_employed"].value_counts()
self_employed Yes 2150 No 2119 Name: count, dtype: int64
category_columns["self_employed"].value_counts().plot.barh();
category_columns.loan_status.unique()
[' Approved', ' Rejected'] Categories (2, object): [' Approved', ' Rejected']
category_columns["loan_status"].value_counts().count()
2
category_columns["loan_status"].value_counts()
loan_status Approved 2656 Rejected 1613 Name: count, dtype: int64
category_columns["loan_status"].value_counts().plot.barh();
num_columns = df.select_dtypes(include=["int64"])
num_columns.head()
| loan_id | no_of_dependents | income_annum | loan_amount | loan_term | cibil_score | residential_assets_value | commercial_assets_value | luxury_assets_value | bank_asset_value | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 2 | 9600000 | 29900000 | 12 | 778 | 2400000 | 17600000 | 22700000 | 8000000 |
| 1 | 2 | 0 | 4100000 | 12200000 | 8 | 417 | 2700000 | 2200000 | 8800000 | 3300000 |
| 2 | 3 | 3 | 9100000 | 29700000 | 20 | 506 | 7100000 | 4500000 | 33300000 | 12800000 |
| 3 | 4 | 3 | 8200000 | 30700000 | 8 | 467 | 18200000 | 3300000 | 23300000 | 7900000 |
| 4 | 5 | 5 | 9800000 | 24200000 | 20 | 382 | 12400000 | 8200000 | 29400000 | 5000000 |
num_columns.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| loan_id | 4269.0 | 2.135000e+03 | 1.232498e+03 | 1.0 | 1068.0 | 2135.0 | 3202.0 | 4269.0 |
| no_of_dependents | 4269.0 | 2.498712e+00 | 1.695910e+00 | 0.0 | 1.0 | 3.0 | 4.0 | 5.0 |
| income_annum | 4269.0 | 5.059124e+06 | 2.806840e+06 | 200000.0 | 2700000.0 | 5100000.0 | 7500000.0 | 9900000.0 |
| loan_amount | 4269.0 | 1.513345e+07 | 9.043363e+06 | 300000.0 | 7700000.0 | 14500000.0 | 21500000.0 | 39500000.0 |
| loan_term | 4269.0 | 1.090045e+01 | 5.709187e+00 | 2.0 | 6.0 | 10.0 | 16.0 | 20.0 |
| cibil_score | 4269.0 | 5.999361e+02 | 1.724304e+02 | 300.0 | 453.0 | 600.0 | 748.0 | 900.0 |
| residential_assets_value | 4269.0 | 7.472617e+06 | 6.503637e+06 | -100000.0 | 2200000.0 | 5600000.0 | 11300000.0 | 29100000.0 |
| commercial_assets_value | 4269.0 | 4.973155e+06 | 4.388966e+06 | 0.0 | 1300000.0 | 3700000.0 | 7600000.0 | 19400000.0 |
| luxury_assets_value | 4269.0 | 1.512631e+07 | 9.103754e+06 | 300000.0 | 7500000.0 | 14600000.0 | 21700000.0 | 39200000.0 |
| bank_asset_value | 4269.0 | 4.976692e+06 | 3.250185e+06 | 0.0 | 2300000.0 | 4600000.0 | 7100000.0 | 14700000.0 |
print("no_of_dependents")
print("---------------------------------")
print("Ortalama: " + str(num_columns["no_of_dependents"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["no_of_dependents"].count()))
print("Maksimum Değer: " + str(num_columns["no_of_dependents"].max()))
print("Minimum Değer: " + str(num_columns["no_of_dependents"].min()))
print("Medyan: " + str(num_columns["no_of_dependents"].median()))
print("Standart Sapma: " + str(num_columns["no_of_dependents"].std()))
print("---------------------------------\n\n")
print("income_annum")
print("---------------------------------")
print("Ortalama: " + str(num_columns["income_annum"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["income_annum"].count()))
print("Maksimum Değer: " + str(num_columns["income_annum"].max()))
print("Minimum Değer: " + str(num_columns["income_annum"].min()))
print("Medyan: " + str(num_columns["income_annum"].median()))
print("Standart Sapma: " + str(num_columns["income_annum"].std()))
print("---------------------------------\n\n")
print("loan_amount")
print("---------------------------------")
print("Ortalama: " + str(num_columns["loan_amount"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["loan_amount"].count()))
print("Maksimum Değer: " + str(num_columns["loan_amount"].max()))
print("Minimum Değer: " + str(num_columns["loan_amount"].min()))
print("Medyan: " + str(num_columns["loan_amount"].median()))
print("Standart Sapma: " + str(num_columns["loan_amount"].std()))
print("---------------------------------\n\n")
print("loan_term")
print("---------------------------------")
print("Ortalama: " + str(num_columns["loan_term"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["loan_term"].count()))
print("Maksimum Değer: " + str(num_columns["loan_term"].max()))
print("Minimum Değer: " + str(num_columns["loan_term"].min()))
print("Medyan: " + str(num_columns["loan_term"].median()))
print("Standart Sapma: " + str(num_columns["loan_term"].std()))
print("---------------------------------\n\n")
print("cibil_score")
print("---------------------------------")
print("Ortalama: " + str(num_columns["cibil_score"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["cibil_score"].count()))
print("Maksimum Değer: " + str(num_columns["cibil_score"].max()))
print("Minimum Değer: " + str(num_columns["cibil_score"].min()))
print("Medyan: " + str(num_columns["cibil_score"].median()))
print("Standart Sapma: " + str(num_columns["cibil_score"].std()))
print("---------------------------------\n\n")
print("residential_assets_value")
print("---------------------------------")
print("Ortalama: " + str(num_columns["residential_assets_value"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["residential_assets_value"].count()))
print("Maksimum Değer: " + str(num_columns["residential_assets_value"].max()))
print("Minimum Değer: " + str(num_columns["residential_assets_value"].min()))
print("Medyan: " + str(num_columns["residential_assets_value"].median()))
print("Standart Sapma: " + str(num_columns["residential_assets_value"].std()))
print("---------------------------------\n\n")
print("commercial_assets_value")
print("---------------------------------")
print("Ortalama: " + str(num_columns["commercial_assets_value"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["commercial_assets_value"].count()))
print("Maksimum Değer: " + str(num_columns["commercial_assets_value"].max()))
print("Minimum Değer: " + str(num_columns["commercial_assets_value"].min()))
print("Medyan: " + str(num_columns["commercial_assets_value"].median()))
print("Standart Sapma: " + str(num_columns["commercial_assets_value"].std()))
print("---------------------------------\n\n")
print("luxury_assets_value")
print("---------------------------------")
print("Ortalama: " + str(num_columns["luxury_assets_value"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["luxury_assets_value"].count()))
print("Maksimum Değer: " + str(num_columns["luxury_assets_value"].max()))
print("Minimum Değer: " + str(num_columns["luxury_assets_value"].min()))
print("Medyan: " + str(num_columns["luxury_assets_value"].median()))
print("Standart Sapma: " + str(num_columns["luxury_assets_value"].std()))
print("---------------------------------\n\n")
print("bank_asset_value")
print("---------------------------------")
print("Ortalama: " + str(num_columns["bank_asset_value"].mean()))
print("Dolu Gözlem Sayısı: " + str(num_columns["bank_asset_value"].count()))
print("Maksimum Değer: " + str(num_columns["bank_asset_value"].max()))
print("Minimum Değer: " + str(num_columns["bank_asset_value"].min()))
print("Medyan: " + str(num_columns["bank_asset_value"].median()))
print("Standart Sapma: " + str(num_columns["bank_asset_value"].std()))
print("---------------------------------\n\n")
no_of_dependents --------------------------------- Ortalama: 2.4987116420707425 Dolu Gözlem Sayısı: 4269 Maksimum Değer: 5 Minimum Değer: 0 Medyan: 3.0 Standart Sapma: 1.695910160711101 --------------------------------- income_annum --------------------------------- Ortalama: 5059123.9166081045 Dolu Gözlem Sayısı: 4269 Maksimum Değer: 9900000 Minimum Değer: 200000 Medyan: 5100000.0 Standart Sapma: 2806839.831818462 --------------------------------- loan_amount --------------------------------- Ortalama: 15133450.456781447 Dolu Gözlem Sayısı: 4269 Maksimum Değer: 39500000 Minimum Değer: 300000 Medyan: 14500000.0 Standart Sapma: 9043362.984842854 --------------------------------- loan_term --------------------------------- Ortalama: 10.900445069102835 Dolu Gözlem Sayısı: 4269 Maksimum Değer: 20 Minimum Değer: 2 Medyan: 10.0 Standart Sapma: 5.7091872792452 --------------------------------- cibil_score --------------------------------- Ortalama: 599.9360505973295 Dolu Gözlem Sayısı: 4269 Maksimum Değer: 900 Minimum Değer: 300 Medyan: 600.0 Standart Sapma: 172.43040073575904 --------------------------------- residential_assets_value --------------------------------- Ortalama: 7472616.537830873 Dolu Gözlem Sayısı: 4269 Maksimum Değer: 29100000 Minimum Değer: -100000 Medyan: 5600000.0 Standart Sapma: 6503636.587664101 --------------------------------- commercial_assets_value --------------------------------- Ortalama: 4973155.3056922 Dolu Gözlem Sayısı: 4269 Maksimum Değer: 19400000 Minimum Değer: 0 Medyan: 3700000.0 Standart Sapma: 4388966.089638461 --------------------------------- luxury_assets_value --------------------------------- Ortalama: 15126305.926446475 Dolu Gözlem Sayısı: 4269 Maksimum Değer: 39200000 Minimum Değer: 300000 Medyan: 14600000.0 Standart Sapma: 9103753.665256497 --------------------------------- bank_asset_value --------------------------------- Ortalama: 4976692.433825252 Dolu Gözlem Sayısı: 4269 Maksimum Değer: 14700000 Minimum Değer: 0 Medyan: 4600000.0 Standart Sapma: 3250185.3056957023 ---------------------------------
Veri setinde sütunların başlarında boşluk atanıyor otomatik olarak bu da çoğu işlemde sorun yaratmakta bu durumu önlemek adına aşağıdaki kod satırı ile veri setini bu konuda düzenliyoruz.¶
# Boşluk karakterini kaldırarak sütun adlarını yeniden adlandırma
df = df.rename(columns=lambda x: x.strip())
plot.barh¶
Eğitim Seviyesine Göre Frekanslar¶
plt.figure(figsize=(10, 6))
df['education'].value_counts().plot.barh(color='skyblue')
plt.title('Eğitim Seviyesine Göre Frekanslar')
plt.xlabel('Frekans')
plt.ylabel('Eğitim Seviyesi')
plt.show()
Kredi Durumuna Göre Meslek Dağılımı¶
plt.figure(figsize=(10, 6))
df[df['loan_status'] == ' Approved']['self_employed'].value_counts().plot.barh(color='green', label='Approved')
df[df['loan_status'] == ' Rejected']['self_employed'].value_counts().plot.barh(color='red', label='Rejected', alpha=0.5)
plt.title('Kredi Durumuna Göre Meslek Dağılımı')
plt.xlabel('Frekans')
plt.ylabel('Meslek Durumu')
plt.legend()
plt.show()
Eğitim seviyesine göre kredi onay oranlarının farkı¶
# Eğitim seviyesine göre kredi onay oranlarının farkını inceleme
education_loan_status = df.groupby('education')['loan_status'].value_counts(normalize=True).unstack()
# Horizontal bar plot oluşturma
education_loan_status.plot.barh(stacked=True)
plt.title('Eğitim Seviyesine Göre Kredi Onay Oranları')
plt.xlabel('Onay Oranı')
plt.ylabel('Eğitim Seviyesi')
plt.legend(title='Kredi Durumu', loc='upper right')
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_19200\1407193328.py:2: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
education_loan_status = df.groupby('education')['loan_status'].value_counts(normalize=True).unstack()
barplot¶
Gelir Seviyesine Göre Kredi Miktarları¶
# Gelir seviyesini belirli aralıklara bölmek
df['income_group'] = pd.cut(df['income_annum'], bins=20)
# Gelir seviyesi gruplarına göre kredi miktarlarının ortalamasını hesaplamak
grouped_df = df.groupby('income_group')['loan_amount'].mean().reset_index()
# Gelir seviyesi gruplarına göre kredi miktarlarını görselleştirmek
sns.barplot(data=grouped_df, x='income_group', y='loan_amount')
plt.title('Gelir Seviyesine Göre Kredi Miktarları')
plt.xlabel('Gelir Grupları')
plt.ylabel('Ortalama Kredi Miktarı')
plt.xticks(rotation=90)
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_19200\1707770033.py:5: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
grouped_df = df.groupby('income_group')['loan_amount'].mean().reset_index()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
grouped_vals = vals.groupby(grouper)
Eğitim Seviyesine Göre Ortalama Kredi Miktarı¶
plt.figure(figsize=(10, 6))
sns.barplot(data=df, x='education', y='loan_amount', estimator=np.mean)
plt.title('Eğitim Seviyesine Göre Ortalama Kredi Miktarı')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Ortalama Kredi Miktarı')
plt.xticks(rotation=45)
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper)
Kredi Durumu ve Eğitim Seviyesine Göre Kredi Miktarı¶
plt.figure(figsize=(10, 6))
sns.barplot(data=df, x='loan_status', y='loan_amount', hue='education', ci=None)
plt.title('Kredi Durumu ve Eğitim Seviyesine Göre Kredi Miktarı')
plt.xlabel('Kredi Durumu')
plt.ylabel('Kredi Miktarı')
plt.xticks(rotation=45)
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_19200\1795589591.py:2: FutureWarning: The `ci` parameter is deprecated. Use `errorbar=None` for the same effect. sns.barplot(data=df, x='loan_status', y='loan_amount', hue='education', ci=None) C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper)
Kredi Durumu ve Eğitim Seviyesine Göre Ortalama Gelir¶
plt.figure(figsize=(10, 6))
sns.barplot(data=df, x='loan_status', y='income_annum', hue='education', estimator=np.mean)
plt.title('Kredi Durumu ve Eğitim Seviyesine Göre Ortalama Gelir')
plt.xlabel('Kredi Durumu')
plt.ylabel('Ortalama Gelir')
plt.xticks(rotation=45)
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper)
catplot¶
Kredi onay durumu, gelir seviyesi ve eğitim seviyesi arasındaki ilişki¶
# Kredi onay durumu, gelir seviyesi ve eğitim seviyesi arasındaki ilişkiyi görselleştirme
sns.catplot(data=df, x='loan_status', y='income_annum', hue='education', kind='box')
plt.title('Kredi Onay Durumu ve Gelir Seviyesi ile Eğitim Seviyesinin İlişkisi')
plt.xlabel('Kredi Durumu')
plt.ylabel('Gelir (Yıllık)')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper) C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper)
Eğitim Seviyesine Göre Kredi Durumu Dağılımı¶
sns.catplot(data=df, x='education', hue='loan_status', kind='count', height=6, aspect=1.5)
plt.title('Eğitim Seviyesine Göre Kredi Durumu Dağılımı')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Gözlem Sayısı')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper) C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper)
Eğitim Seviyesine Göre Kredi Miktarı Dağılımı¶
sns.catplot(data=df, x='education', y='loan_amount', kind='box', height=6, aspect=1.5)
plt.title('Eğitim Seviyesine Göre Kredi Miktarı Dağılımı')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Kredi Miktarı')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper) C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper)
Kredi Durumu ve Eğitim Seviyesine Göre Kredi Miktarı Dağılımı¶
sns.catplot(data=df, x='loan_status', y='loan_amount', kind='violin', hue='education', height=6, aspect=1.5)
plt.title('Kredi Durumu ve Eğitim Seviyesine Göre Kredi Miktarı Dağılımı')
plt.xlabel('Kredi Durumu')
plt.ylabel('Kredi Miktarı')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper) C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper)
groupby¶
# Kredi onay durumuna göre ortalama kredi miktarını inceleme
loan_status_loan_amount = df.groupby('loan_status')['loan_amount'].mean().reset_index()
# Bar plot oluşturma
plt.bar(loan_status_loan_amount['loan_status'], loan_status_loan_amount['loan_amount'])
plt.title('Kredi Onay Durumuna Göre Ortalama Kredi Miktarı')
plt.xlabel('Kredi Durumu')
plt.ylabel('Ortalama Kredi Miktarı')
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_19200\2061942401.py:2: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
loan_status_loan_amount = df.groupby('loan_status')['loan_amount'].mean().reset_index()
Eğitim Seviyesine Göre Ortalama Kredi Miktarı¶
df.groupby('education')['loan_amount'].mean().plot.bar(color='skyblue')
plt.title('Eğitim Seviyesine Göre Ortalama Kredi Miktarı')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Ortalama Kredi Miktarı')
plt.xticks(rotation=45)
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_19200\3426761793.py:1: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
df.groupby('education')['loan_amount'].mean().plot.bar(color='skyblue')
Kredi Durumuna Göre Ortalama Gelir¶
df.groupby('loan_status')['income_annum'].mean().plot.bar(color='lightgreen')
plt.title('Kredi Durumuna Göre Ortalama Gelir')
plt.xlabel('Kredi Durumu')
plt.ylabel('Ortalama Gelir')
plt.xticks(rotation=0)
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_19200\1141899142.py:1: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
df.groupby('loan_status')['income_annum'].mean().plot.bar(color='lightgreen')
Eğitim Seviyesine ve Kredi Durumuna Göre Ortalama Kredi Miktarı¶
df.groupby(['education', 'loan_status'])['loan_amount'].mean().unstack().plot(kind='bar', stacked=True)
plt.title('Eğitim Seviyesine ve Kredi Durumuna Göre Ortalama Kredi Miktarı')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Ortalama Kredi Miktarı')
plt.xticks(rotation=45)
plt.legend(title='Kredi Durumu')
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_19200\2422632664.py:1: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. df.groupby(['education', 'loan_status'])['loan_amount'].mean().unstack().plot(kind='bar', stacked=True)
histplot¶
# Cibil skoru dağılımını görselleştirme
sns.histplot(data=df, x='cibil_score', kde=True)
plt.title('Cibil Skoru Dağılımı')
plt.xlabel('Cibil Skoru')
plt.ylabel('Frekans')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
Gelir Dağılım¶
plt.figure(figsize=(10, 6))
sns.histplot(data=df, x='income_annum', bins=20, kde=True, color='skyblue')
plt.title('Gelir Dağılımı')
plt.xlabel('Gelir (Yıllık)')
plt.ylabel('Frekans')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
Kredi Miktarı Dağılımı¶
plt.figure(figsize=(10, 6))
sns.histplot(data=df, x='loan_amount', bins=30, kde=True, color='orange')
plt.title('Kredi Miktarı Dağılımı')
plt.xlabel('Kredi Miktarı')
plt.ylabel('Frekans')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
Cibil Skoru Dağılımı¶
plt.figure(figsize=(10, 6))
sns.histplot(data=df, x='cibil_score', bins=20, kde=True, color='green')
plt.title('Cibil Skoru Dağılımı')
plt.xlabel('Cibil Skoru')
plt.ylabel('Frekans')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
kdeplot¶
# Kredi miktarı ve gelir seviyesi arasındaki yoğunluğu görselleştirme
sns.kdeplot(data=df, x='loan_amount', y='income_annum', cmap='Blues', fill=True)
plt.title('Kredi Miktarı ve Gelir Seviyesi Yoğunluğu')
plt.xlabel('Kredi Miktarı')
plt.ylabel('Gelir (Yıllık)')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
Gelir Dağılımı (KDE)¶
plt.figure(figsize=(10, 6))
sns.kdeplot(data=df, x='income_annum', hue='loan_status', fill=True, common_norm=False)
plt.title('Gelir Dağılımı (KDE)')
plt.xlabel('Gelir (Yıllık)')
plt.ylabel('Yoğunluk')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
data_subset = grouped_data.get_group(pd_key)
Kredi Miktarı Dağılımı (KDE)¶
plt.figure(figsize=(10, 6))
sns.kdeplot(data=df, x='loan_amount', hue='education', fill=True, common_norm=False)
plt.title('Kredi Miktarı Dağılımı (KDE)')
plt.xlabel('Kredi Miktarı')
plt.ylabel('Yoğunluk')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
data_subset = grouped_data.get_group(pd_key)
Cibil Skoru Dağılımı (KDE)¶
plt.figure(figsize=(10, 6))
sns.kdeplot(data=df, x='cibil_score', hue='loan_status', fill=True, common_norm=False)
plt.title('Cibil Skoru Dağılımı (KDE)')
plt.xlabel('Cibil Skoru')
plt.ylabel('Yoğunluk')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
data_subset = grouped_data.get_group(pd_key)
boxplot¶
# Eğitim seviyesine göre cibil skorlarının dağılımını görselleştirme
sns.boxplot(data=df, x='education', y='cibil_score')
plt.title('Eğitim Seviyesine Göre Cibil Skorları Dağılımı')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Cibil Skoru')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper)
Eğitim Seviyesine Göre Gelir Dağılımı¶
plt.figure(figsize=(10, 6))
sns.boxplot(data=df, x='education', y='income_annum')
plt.title('Eğitim Seviyesine Göre Gelir Dağılımı')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Gelir (Yıllık)')
plt.xticks(rotation=45)
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper)
Kredi Durumu ve Eğitim Seviyesine Göre Kredi Miktarı Dağılımı¶
plt.figure(figsize=(10, 6))
sns.boxplot(data=df, x='loan_status', y='loan_amount', hue='education')
plt.title('Kredi Durumu ve Eğitim Seviyesine Göre Kredi Miktarı Dağılımı')
plt.xlabel('Kredi Durumu')
plt.ylabel('Kredi Miktarı')
plt.xticks(rotation=45)
plt.legend(title='Eğitim Seviyesi')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper)
Kredi Durumu ve Eğitim Seviyesine Göre Cibil Skoru Dağılımı¶
plt.figure(figsize=(10, 6))
sns.boxplot(data=df, x='loan_status', y='cibil_score', hue='education')
plt.title('Kredi Durumu ve Eğitim Seviyesine Göre Cibil Skoru Dağılımı')
plt.xlabel('Kredi Durumu')
plt.ylabel('Cibil Skoru')
plt.xticks(rotation=45)
plt.legend(title='Eğitim Seviyesi')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\categorical.py:641: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning. grouped_vals = vals.groupby(grouper)
scatterplot¶
# Kredi miktarı ile banka varlıkları arasındaki ilişkiyi görselleştirme
sns.scatterplot(data=df, x='loan_amount', y='bank_asset_value')
plt.title('Kredi Miktarı ile Banka Varlıkları Arasındaki İlişki')
plt.xlabel('Kredi Miktarı')
plt.ylabel('Banka Varlıkları')
plt.show()
Gelir ve Kredi Miktarı Arasındaki İlişki¶
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='income_annum', y='loan_amount', hue='loan_status')
plt.title('Gelir ve Kredi Miktarı Arasındaki İlişki')
plt.xlabel('Gelir (Yıllık)')
plt.ylabel('Kredi Miktarı')
plt.legend(title='Kredi Durumu')
plt.show()
Cibil Skoru ve Kredi Miktarı Arasındaki İlişki¶
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='cibil_score', y='loan_amount', hue='education')
plt.title('Cibil Skoru ve Kredi Miktarı Arasındaki İlişki')
plt.xlabel('Cibil Skoru')
plt.ylabel('Kredi Miktarı')
plt.legend(title='Eğitim Seviyesi')
plt.show()
Bağımlı Sayısı ve Kredi Miktarı Arasındaki İlişki¶
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='no_of_dependents', y='loan_amount', hue='self_employed')
plt.title('Bağımlı Sayısı ve Kredi Miktarı Arasındaki İlişki')
plt.xlabel('Bağımlı Sayısı')
plt.ylabel('Kredi Miktarı')
plt.legend(title='Meslek Durumu')
plt.show()
lmplot¶
# Gelir seviyesi ile kredi miktarı arasındaki ilişkiyi görselleştirme ve doğrusal modelleme
sns.lmplot(data=df, x='income_annum', y='loan_amount')
plt.title('Gelir Seviyesi ile Kredi Miktarı Arasındaki İlişki')
plt.xlabel('Gelir (Yıllık)')
plt.ylabel('Kredi Miktarı')
plt.show()
Gelir ve Kredi Miktarı İlişkisi (Lineer Model)¶
sns.lmplot(data=df, x='income_annum', y='loan_amount', hue='loan_status', height=6)
plt.title('Gelir ve Kredi Miktarı İlişkisi (Lineer Model)')
plt.xlabel('Gelir (Yıllık)')
plt.ylabel('Kredi Miktarı')
plt.show()
Cibil Skoru ve Kredi Miktarı İlişkisi (Lineer Model)¶
sns.lmplot(data=df, x='cibil_score', y='loan_amount', hue='education', height=6)
plt.title('Cibil Skoru ve Kredi Miktarı İlişkisi (Lineer Model)')
plt.xlabel('Cibil Skoru')
plt.ylabel('Kredi Miktarı')
plt.show()
Bağımlı Sayısı ve Kredi Miktarı İlişkisi (Lineer Model)¶
sns.lmplot(data=df, x='no_of_dependents', y='loan_amount', hue='self_employed', height=6)
plt.title('Bağımlı Sayısı ve Kredi Miktarı İlişkisi (Lineer Model)')
plt.xlabel('Bağımlı Sayısı')
plt.ylabel('Kredi Miktarı')
plt.show()
pairplot¶
# Tüm sayısal değişkenler arasındaki ilişkiyi görselleştirme
sns.pairplot(df)
plt.title('Tüm Sayısal Değişkenler Arasındaki İlişki')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
Sayısal Değişkenler Arasındaki İlişkiler¶
sns.pairplot(df[['income_annum', 'loan_amount', 'cibil_score']])
plt.title('Sayısal Değişkenler Arasındaki İlişkiler')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
Sayısal Değişkenler ve Kredi Durumu Arasındaki İlişkiler¶
sns.pairplot(df[['income_annum', 'loan_amount', 'no_of_dependents', 'loan_status']], hue='loan_status')
plt.title('Sayısal Değişkenler ve Kredi Durumu Arasındaki İlişkiler')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
data_subset = grouped_data.get_group(pd_key)
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
data_subset = grouped_data.get_group(pd_key)
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
data_subset = grouped_data.get_group(pd_key)
Kredi Miktarı, Cibil Skoru ve Bağımlı Sayısı Arasındaki İlişkiler¶
sns.pairplot(df[['loan_amount', 'cibil_score', 'no_of_dependents','education']], hue='education')
plt.title('Kredi Miktarı, Cibil Skoru ve Bağımlı Sayısı Arasındaki İlişkiler')
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
data_subset = grouped_data.get_group(pd_key)
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
data_subset = grouped_data.get_group(pd_key)
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
grouped_data = data.groupby(
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1075: FutureWarning: When grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.
data_subset = grouped_data.get_group(pd_key)
pivot¶
Eğitim Seviyesi ve Kredi Durumuna Göre Ortalama Kredi Miktarı¶
pivot_table = df.pivot_table(index='education', columns='loan_status', values='loan_amount', aggfunc='mean')
pivot_table.plot(kind='bar', figsize=(10, 6))
plt.title('Eğitim Seviyesi ve Kredi Durumuna Göre Ortalama Kredi Miktarı')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Ortalama Kredi Miktarı')
plt.xticks(rotation=45)
plt.legend(title='Kredi Durumu')
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_17764\364336002.py:1: FutureWarning: The default value of observed=False is deprecated and will change to observed=True in a future version of pandas. Specify observed=False to silence this warning and retain the current behavior pivot_table = df.pivot_table(index='education', columns='loan_status', values='loan_amount', aggfunc='mean')
Meslek Durumu ve Kredi Durumuna Göre Ortalama Gelir¶
pivot_table2 = df.pivot_table(index='self_employed', columns='loan_status', values='income_annum', aggfunc='mean')
pivot_table2.plot(kind='bar', figsize=(10, 6))
plt.title('Meslek Durumu ve Kredi Durumuna Göre Ortalama Gelir')
plt.xlabel('Meslek Durumu')
plt.ylabel('Ortalama Gelir (Yıllık)')
plt.xticks(rotation=45)
plt.legend(title='Kredi Durumu')
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_17764\980057846.py:1: FutureWarning: The default value of observed=False is deprecated and will change to observed=True in a future version of pandas. Specify observed=False to silence this warning and retain the current behavior pivot_table2 = df.pivot_table(index='self_employed', columns='loan_status', values='income_annum', aggfunc='mean')
Kredi Durumu ve Eğitim Seviyesine Göre Ortalama Cibil Skoru¶
pivot_table3 = df.pivot_table(index='loan_status', columns='education', values='cibil_score', aggfunc='mean')
pivot_table3.plot(kind='bar', figsize=(10, 6))
plt.title('Kredi Durumu ve Eğitim Seviyesine Göre Ortalama Cibil Skoru')
plt.xlabel('Kredi Durumu')
plt.ylabel('Ortalama Cibil Skoru')
plt.xticks(rotation=45)
plt.legend(title='Eğitim Seviyesi')
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_17764\4131333369.py:1: FutureWarning: The default value of observed=False is deprecated and will change to observed=True in a future version of pandas. Specify observed=False to silence this warning and retain the current behavior pivot_table3 = df.pivot_table(index='loan_status', columns='education', values='cibil_score', aggfunc='mean')
heatmap¶
Değişkenler Arasındaki Korelasyon¶
correlation_matrix = num_columns.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Değişkenler Arasındaki Korelasyon')
plt.show()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[45], line 1 ----> 1 correlation_matrix = num_columns.corr() 2 plt.figure(figsize=(10, 8)) 3 sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f') NameError: name 'num_columns' is not defined
Eğitim ve Meslek Durumuna Göre Ortalama Kredi Miktarı¶
plt.figure(figsize=(10, 8))
sns.heatmap(df.pivot_table(index='education', columns='self_employed', values='loan_amount', aggfunc='mean'), annot=True, cmap='viridis')
plt.title('Eğitim ve Meslek Durumuna Göre Ortalama Kredi Miktarı')
plt.xlabel('Meslek Durumu')
plt.ylabel('Eğitim Seviyesi')
plt.xticks(rotation=45)
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_17764\619796474.py:2: FutureWarning: The default value of observed=False is deprecated and will change to observed=True in a future version of pandas. Specify observed=False to silence this warning and retain the current behavior sns.heatmap(df.pivot_table(index='education', columns='self_employed', values='loan_amount', aggfunc='mean'), annot=True, cmap='viridis')
Kredi Durumu ve Eğitim Seviyesine Göre Ortalama Cibil Skoru¶
plt.figure(figsize=(10, 8))
sns.heatmap(df.pivot_table(index='loan_status', columns='education', values='cibil_score', aggfunc='mean'), annot=True, cmap='rocket')
plt.title('Kredi Durumu ve Eğitim Seviyesine Göre Ortalama Cibil Skoru')
plt.xlabel('Eğitim Seviyesi')
plt.ylabel('Kredi Durumu')
plt.xticks(rotation=45)
plt.show()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_17764\2180551148.py:2: FutureWarning: The default value of observed=False is deprecated and will change to observed=True in a future version of pandas. Specify observed=False to silence this warning and retain the current behavior sns.heatmap(df.pivot_table(index='loan_status', columns='education', values='cibil_score', aggfunc='mean'), annot=True, cmap='rocket')
lineplot¶
Kredi Süresi ve Kredi Miktarı İlişkisi¶
plt.figure(figsize=(10, 6))
sns.lineplot(data=df, x='loan_term', y='loan_amount', hue='loan_status')
plt.title('Kredi Süresi ve Kredi Miktarı İlişkisi')
plt.xlabel('Kredi Süresi (Ay)')
plt.ylabel('Kredi Miktarı')
plt.legend(title='Kredi Durumu')
plt.show()
Cibil Skoru ve Kredi Miktarı İlişkisi¶
plt.figure(figsize=(10, 6))
sns.lineplot(data=df, x='cibil_score', y='loan_amount', hue='education')
plt.title('Cibil Skoru ve Kredi Miktarı İlişkisi')
plt.xlabel('Cibil Skoru')
plt.ylabel('Kredi Miktarı')
plt.legend(title='Eğitim Seviyesi')
plt.show()
Kredi Miktarı ve Gelir İlişkisi¶
plt.figure(figsize=(10, 6))
sns.lineplot(data=df, x='loan_amount', y='income_annum', hue='loan_status')
plt.title('Kredi Miktarı ve Gelir İlişkisi')
plt.xlabel('Kredi Miktarı')
plt.ylabel('Gelir (Yıllık)')
plt.legend(title='Kredi Durumu')
plt.show()
İstatistik¶
1.Örneklem¶
# Veri setinin %10'luk bir örneklemini alma
sample_df = df.sample(frac=0.1, random_state=1)
# Örneklem verisini görüntüleme
print(sample_df.head())
2.Betimsel İstatistik¶
Varyans Hesaplama Öncelikle, örneklem verimizin belirli sütunları için varyans hesaplayalım. Varyans, bir veri kümesinin yayılımını ölçen bir istatistiksel değerdir.
# Varyans hesaplama
variances = sample_df.var(numeric_only=True)
print("Varyanslar:")
print(variances)
Kovaryans Hesaplama Kovaryans, iki değişkenin birlikte nasıl değiştiğini ölçer. Tüm çiftler için kovaryans matrisini hesaplayabiliriz.
# Kovaryans matrisini hesaplama
cov_matrix = sample_df.cov(numeric_only=True)
print("Kovaryans Matrisi:")
print(cov_matrix)
Korelasyon Hesaplama Korelasyon, iki değişken arasındaki doğrusal ilişkiyi ölçer. Tüm çiftler için korelasyon matrisini hesaplayabiliriz.
# Korelasyon matrisini hesaplama
corr_matrix = sample_df.corr(numeric_only=True)
print("Korelasyon Matrisi:")
print(corr_matrix)
3.Fiyat Stratejisi Karar Destek¶
Bu adımda, genellikle gelir ve kredi miktarı gibi ekonomik değişkenlerin analizi yapılır. Bu analizde, örneklem verimizdeki bazı sütunların istatistiksel özetlerini kullanarak karar destek sistemi oluşturabiliriz.
# Gelir ve kredi miktarının özet istatistikleri
income_stats = sample_df['income_annum'].describe()
loan_amount_stats = sample_df['loan_amount'].describe()
print("Gelir İstatistikleri:")
print(income_stats)
print("\nKredi Miktarı İstatistikleri:")
print(loan_amount_stats)
Gelir ve kredi miktarı arasındaki korelasyonu hesaplayalım:
# Gelir ve kredi miktarı arasındaki korelasyon
income_loan_corr = sample_df['income_annum'].corr(sample_df['loan_amount'])
print("\nGelir ve Kredi Miktarı Arasındaki Korelasyon: {:.2f}".format(income_loan_corr))
4.Olasılık Dağılımları¶
Bernoulli Dağılımı¶
Bernoulli dağılımı, iki olası sonuç (başarı ve başarısızlık) içeren deneyler için kullanılır. self_employed veya loan_status sütunları, Bernoulli dağılımı için uygun olabilir.
from scipy.stats import bernoulli
# self_employed sütunu için Bernoulli dağılımı
p_self_employed = sample_df['self_employed'].value_counts(normalize=True)[' Yes']
bernoulli_dist = bernoulli(p_self_employed)
print("Bernoulli Dağılımı (self_employed 'Yes' olma olasılığı): {:.2f}".format(p_self_employed))
print("Beklenen değer (mean): {:.2f}".format(bernoulli_dist.mean()))
print("Varyans: {:.2f}".format(bernoulli_dist.var()))
Binom Dağılımı¶
Binom dağılımı, belirli bir sayıda bağımsız Bernoulli denemesinin sonuçlarını modellemek için kullanılır.
from scipy.stats import binom
# Örnek: 10 denemede self_employed olanların sayısı
n = 10
binom_dist = binom(n, p_self_employed)
print("Binom Dağılımı (10 denemede 'Yes' sayısı):")
print("Beklenen değer (mean): {:.2f}".format(binom_dist.mean()))
print("Varyans: {:.2f}".format(binom_dist.var()))
for k in range(n + 1):
print(f"10 denemede {k} kez 'Yes' olma olasılığı: {binom_dist.pmf(k):.4f}")
Geometrik Dağılım¶
Geometrik dağılım, ilk başarıya kadar geçen deneme sayısını modellemek için kullanılır.
from scipy.stats import geom
geom_dist = geom(p_self_employed)
print("Geometrik Dağılım (ilk 'Yes' olana kadar geçen deneme sayısı):")
print("Beklenen değer (mean): {:.2f}".format(geom_dist.mean()))
print("Varyans: {:.2f}".format(geom_dist.var()))
for k in range(1, 11):
print(f"İlk 'Yes' için {k}. deneme olasılığı: {geom_dist.pmf(k):.4f}")
Poisson Dağılımı¶
Poisson dağılımı, belirli bir zaman aralığında belirli olayların sayısını modellemek için kullanılır.
from scipy.stats import poisson
# Örnek: Saatte 2.5 kredi başvurusu ortalaması ile Poisson dağılımı
lambda_ = 2.5
poisson_dist = poisson(mu=lambda_)
print("Poisson Dağılımı (ortalama 2.5 kredi başvurusu):")
print("Beklenen değer (mean): {:.2f}".format(poisson_dist.mean()))
print("Varyans: {:.2f}".format(poisson_dist.var()))
for k in range(10):
print(f"{k} kredi başvurusu olasılığı: {poisson_dist.pmf(k):.4f}")
Normal Dağılım¶
Normal dağılım, sürekli veri için en yaygın dağılımdır. income_annum veya loan_amount sütunları normal dağılımı analiz etmek için kullanılabilir.
from scipy.stats import norm
# Gelir verisi için normal dağılım
mean_income = sample_df['income_annum'].mean()
std_income = sample_df['income_annum'].std()
norm_dist = norm(mean_income, std_income)
print("Normal Dağılım (gelir verisi):")
print("Beklenen değer (mean): {:.2f}".format(norm_dist.mean()))
print("Varyans: {:.2f}".format(norm_dist.var()))
print(f"68% olasılıkla gelir {norm_dist.ppf(0.16):.2f} ile {norm_dist.ppf(0.84):.2f} arasında olacaktır.")
Üssel Dağılım¶
Üssel dağılım, olaylar arasındaki süreyi modellemek için kullanılır.
from scipy.stats import expon
# Örnek: Ortalama 1 kredi başvuru süresi ile üssel dağılım
lambda_exp = 1.0
expon_dist = expon(scale=1/lambda_exp)
print("Üssel Dağılım (ortalama 1 kredi başvuru süresi):")
print("Beklenen değer (mean): {:.2f}".format(expon_dist.mean()))
print("Varyans: {:.2f}".format(expon_dist.var()))
for k in range(1, 11):
print(f"{k} zaman birimi içinde olay gerçekleşme olasılığı: {expon_dist.cdf(k):.4f}")
5.Hipotez Testi¶
Tek Örneklem T Testi¶
Tek örneklem T testi, bir örneklem ortalamasının bilinen bir popülasyon ortalamasına eşit olup olmadığını test eder. Örneğin, gelir verisinin belirli bir değere eşit olup olmadığını test edebiliriz.
from scipy.stats import ttest_1samp
# Örnek: Gelir verisinin ortalamasının 5,000,000'a eşit olup olmadığını test etme
popmean = 5000000
t_stat, p_value = ttest_1samp(sample_df['income_annum'], popmean)
print("Tek Örneklem T Testi:")
print("T-istatistiği: {:.2f}".format(t_stat))
print("p-değeri: {:.4f}".format(p_value))
Bağımsız İki Örneklem T Testi (AB Testi)¶
Bu test, iki bağımsız örneklemin ortalamalarının eşit olup olmadığını test eder. Örneğin, self_employed olan ve olmayan kişilerin gelir ortalamalarını karşılaştırabiliriz.
from scipy.stats import ttest_ind
# Örnek: self_employed olan ve olmayan kişilerin gelir ortalamalarını karşılaştırma
income_self_employed = sample_df[sample_df['self_employed'] == ' Yes']['income_annum']
income_not_self_employed = sample_df[sample_df['self_employed'] == ' No']['income_annum']
t_stat, p_value = ttest_ind(income_self_employed, income_not_self_employed, equal_var=False)
print("Bağımsız İki Örneklem T Testi (self_employed 'Yes' ve 'No' için):")
print("T-istatistiği: {:.2f}".format(t_stat))
print("p-değeri: {:.4f}".format(p_value))
İki Örneklem Oran Testi¶
Bu test, iki oran arasındaki farkın istatistiksel olarak anlamlı olup olmadığını test eder. Örneğin, loan_status oranlarını karşılaştırabiliriz.
from scipy.stats import chi2_contingency
# 'Approved' ve 'Rejected' gruplarının gözlem sayılarını alalım
approved_count = sample_df['loan_status'].value_counts()[' Approved']
rejected_count = sample_df['loan_status'].value_counts()[' Rejected']
# Toplam gözlem sayısını bulalım
total_obs = len(sample_df)
# Beklenen frekansları hesaplayalım (Approved oranı * toplam gözlem sayısı)
expected_approved = total_obs * (approved_count / total_obs)
expected_rejected = total_obs * (rejected_count / total_obs)
# Gözlemlenen ve beklenen frekansları içeren bir tabloyu oluşturalım
observed = [approved_count, rejected_count]
expected = [expected_approved, expected_rejected]
# Chi-kare testini yapalım
chi2_stat, p_value, dof, expected = chi2_contingency([observed, expected])
print("Chi-kare Testi:")
print("Chi-kare istatistiği: {:.2f}".format(chi2_stat))
print("p-değeri: {:.4f}".format(p_value))
Chi-kare testi sonucu istatistiksel olarak anlamlı bir ilişki tespit etmediğini gösteriyor. P-değeri 0.05'ten büyük olduğu için, 'Approved' ve 'Rejected' grupları arasında anlamlı bir fark olmadığını söyleyebiliriz.
Bu sonuç, 'loan_status' değişkeninin 'Approved' ve 'Rejected' grupları arasında dağılımının rastgele olmadığını, yani belirli bir tür kredi başvurusunun diğerinden daha yaygın olmadığını gösteriyor.
Eğer başka bir analiz veya sorunuz varsa, lütfen belirtin, yardımcı olmaktan memnuniyet duyarım.
Varyans Analizi (ANOVA)¶
ANOVA, üç veya daha fazla grup ortalamaları arasındaki farkları test eder. Örneğin, education değişkenine göre gelir farklılıklarını test edebiliriz.
from scipy.stats import f_oneway
# Örnek: education değişkenine göre gelir farklılıkları
income_graduate = sample_df[sample_df['education'] == ' Graduate']['income_annum']
income_not_graduate = sample_df[sample_df['education'] == ' Not Graduate']['income_annum']
f_stat, p_value = f_oneway(income_graduate, income_not_graduate)
print("Varyans Analizi (ANOVA):")
print("F-istatistiği: {:.2f}".format(f_stat))
print("p-değeri: {:.4f}".format(p_value))
Korelasyon Analizi¶
Korelasyon analizi, iki değişken arasındaki ilişkinin gücünü ve yönünü belirler. Örneğin, gelir ve kredi miktarı arasındaki korelasyonu analiz edebiliriz.
# Gelir ve kredi miktarı arasındaki korelasyon
income_loan_corr = sample_df['income_annum'].corr(sample_df['loan_amount'])
print("\nGelir ve Kredi Miktarı Arasındaki Korelasyon: {:.2f}".format(income_loan_corr))
Veri Ön İşleme
loan_id değerine ihtiyacımız olmayacağı için o sütunu siliyorum ve category türündeki değişkenlerimi sınıflandırma problemine hazırlayabilmek adına sayısal verilere dönüştüreceğim.¶
#df_number = df.drop(columns=['loan_id','income_group'],inplace=False)
df_number = df.drop(columns=['loan_id'],inplace=False)
df_number = df_number.select_dtypes(include=["int64"])
#df_category = df.drop(columns=['loan_id','income_group'],inplace=False)
df_category = df.drop(columns=['loan_id'],inplace=False)
df_category = df_category.select_dtypes(include=["category"])
df_number.head()
| no_of_dependents | income_annum | loan_amount | loan_term | cibil_score | residential_assets_value | commercial_assets_value | luxury_assets_value | bank_asset_value | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 9600000 | 29900000 | 12 | 778 | 2400000 | 17600000 | 22700000 | 8000000 |
| 1 | 0 | 4100000 | 12200000 | 8 | 417 | 2700000 | 2200000 | 8800000 | 3300000 |
| 2 | 3 | 9100000 | 29700000 | 20 | 506 | 7100000 | 4500000 | 33300000 | 12800000 |
| 3 | 3 | 8200000 | 30700000 | 8 | 467 | 18200000 | 3300000 | 23300000 | 7900000 |
| 4 | 5 | 9800000 | 24200000 | 20 | 382 | 12400000 | 8200000 | 29400000 | 5000000 |
df_category.head()
| education | self_employed | loan_status | |
|---|---|---|---|
| 0 | Graduate | No | Approved |
| 1 | Not Graduate | Yes | Rejected |
| 2 | Graduate | No | Rejected |
| 3 | Graduate | No | Rejected |
| 4 | Not Graduate | Yes | Rejected |
Aykırı Gözlem analizi (Outlier Analysis)
import seaborn as sns
import matplotlib.pyplot as plt
# Tüm sayısal değişkenler için kutu grafiği oluşturma
plt.figure(figsize=(13,12))
for i, col in enumerate(df_number, 1):
plt.subplot(3, 4, i)
sns.boxplot(x=df_number[col])
plt.title('Boxplot for {}'.format(col))
plt.tight_layout()
plt.show()
# "residential_assets_value" ve "commercial_assets_value" değişkenlerinin histogramları ve kutu grafikleri
plt.figure(figsize=(15, 6))
# residential_assets_value histogram ve kutu grafiği
plt.subplot(1, 2, 1)
sns.histplot(df_number['residential_assets_value'], kde=True)
plt.title('Histogram for residential_assets_value')
plt.subplot(1, 2, 2)
sns.boxplot(x=df_number['residential_assets_value'])
plt.title('Boxplot for residential_assets_value')
plt.tight_layout()
plt.show()
# commercial_assets_value histogram ve kutu grafiği
plt.figure(figsize=(15, 6))
plt.subplot(1, 2, 1)
sns.histplot(df_number['commercial_assets_value'], kde=True)
plt.title('Histogram for commercial_assets_value')
plt.subplot(1, 2, 2)
sns.boxplot(x=df_number['commercial_assets_value'])
plt.title('Boxplot for commercial_assets_value')
plt.tight_layout()
plt.show()
# "residential_assets_value" ve "commercial_assets_value" değişkenlerinin histogramları ve kutu grafikleri
plt.figure(figsize=(15, 6))
# residential_assets_value histogram ve kutu grafiği
plt.subplot(1, 2, 1)
sns.histplot(df_number['bank_asset_value'], kde=True)
plt.title('Histogram for bank_asset_value')
plt.subplot(1, 2, 2)
sns.boxplot(x=df_number['bank_asset_value'])
plt.title('Boxplot for bank_asset_value')
plt.tight_layout()
plt.show()
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\ENIAC\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
import seaborn as sns
import matplotlib.pyplot as plt
# "residential_assets_value", "commercial_assets_value" ve "bank_assets_value" değişkenlerinin boxplotları
plt.figure(figsize=(18, 6))
# residential_assets_value boxplot
plt.subplot(1, 3, 1)
sns.boxplot(x=df['residential_assets_value'])
plt.title('Boxplot for residential_assets_value')
# commercial_assets_value boxplot
plt.subplot(1, 3, 2)
sns.boxplot(x=df['commercial_assets_value'])
plt.title('Boxplot for commercial_assets_value')
# bank_assets_value boxplot
plt.subplot(1, 3, 3)
sns.boxplot(x=df['bank_asset_value'])
plt.title('Boxplot for bank_asset_value')
plt.tight_layout()
plt.show()
1.residential_assets_value (ortalama değer ile aykırı değer çözme)¶
df_number_residential_assets_value = df_number["residential_assets_value"]
df_number_residential_assets_value.head()
0 2400000 1 2700000 2 7100000 3 18200000 4 12400000 Name: residential_assets_value, dtype: int64
sns.boxplot(x = df_number_residential_assets_value);
Q1 = df_number_residential_assets_value.quantile(0.25)
Q3 = df_number_residential_assets_value.quantile(0.75)
IQR = Q3-Q1
print(Q1)
print(Q3)
print(IQR)
2200000.0 11300000.0 9100000.0
alt_sinir = Q1- 1.5*IQR
ust_sinir = Q3 + 1.5*IQR
print(alt_sinir)
print(ust_sinir)
-11450000.0 24950000.0
(df_number_residential_assets_value < alt_sinir) | (df_number_residential_assets_value > ust_sinir)
0 False
1 False
2 False
3 False
4 False
...
4264 False
4265 False
4266 False
4267 False
4268 False
Name: residential_assets_value, Length: 4269, dtype: bool
aykiri_tf = (df_number_residential_assets_value < alt_sinir) | (df_number_residential_assets_value > ust_sinir)
df_number_residential_assets_value[aykiri_tf]
82 25500000 98 25900000 123 26800000 228 25500000 262 25600000 693 25400000 714 28300000 781 26300000 892 27600000 905 28700000 919 25300000 924 25800000 953 25500000 956 25300000 987 28200000 997 25200000 1002 25800000 1397 25700000 1419 25500000 1468 27000000 1591 28500000 1625 26300000 1965 28400000 1997 27000000 2185 25900000 2318 28500000 2384 26600000 2412 26200000 2586 28000000 2715 25800000 2818 25500000 2828 26900000 2922 25200000 2927 27600000 2930 25300000 2940 26100000 3119 29100000 3157 25400000 3234 28200000 3310 25100000 3498 25600000 3631 25400000 3763 26100000 3782 27500000 3860 25000000 3868 25500000 3872 25400000 3880 26200000 4027 25100000 4042 27400000 4074 27300000 4237 26200000 Name: residential_assets_value, dtype: int64
df_number_residential_assets_value[aykiri_tf].index
Index([ 82, 98, 123, 228, 262, 693, 714, 781, 892, 905, 919, 924,
953, 956, 987, 997, 1002, 1397, 1419, 1468, 1591, 1625, 1965, 1997,
2185, 2318, 2384, 2412, 2586, 2715, 2818, 2828, 2922, 2927, 2930, 2940,
3119, 3157, 3234, 3310, 3498, 3631, 3763, 3782, 3860, 3868, 3872, 3880,
4027, 4042, 4074, 4237],
dtype='int64')
df_number_residential_assets_value = pd.DataFrame(df_number_residential_assets_value)
df_number_residential_assets_value.shape
(4269, 1)
df_number_residential_assets_value.mean()
residential_assets_value 7.472617e+06 dtype: float64
df_number.loc[aykiri_tf, 'residential_assets_value'] = df_number['residential_assets_value'].mean()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_17764\1074059456.py:1: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '7472616.537830873' has dtype incompatible with int64, please explicitly cast to a compatible dtype first. df_number.loc[aykiri_tf, 'residential_assets_value'] = df_number['residential_assets_value'].mean()
df_number
| no_of_dependents | income_annum | loan_amount | loan_term | cibil_score | residential_assets_value | commercial_assets_value | luxury_assets_value | bank_asset_value | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 9600000 | 29900000 | 12 | 778 | 2400000.0 | 17600000 | 22700000 | 8000000 |
| 1 | 0 | 4100000 | 12200000 | 8 | 417 | 2700000.0 | 2200000 | 8800000 | 3300000 |
| 2 | 3 | 9100000 | 29700000 | 20 | 506 | 7100000.0 | 4500000 | 33300000 | 12800000 |
| 3 | 3 | 8200000 | 30700000 | 8 | 467 | 18200000.0 | 3300000 | 23300000 | 7900000 |
| 4 | 5 | 9800000 | 24200000 | 20 | 382 | 12400000.0 | 8200000 | 29400000 | 5000000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4264 | 5 | 1000000 | 2300000 | 12 | 317 | 2800000.0 | 500000 | 3300000 | 800000 |
| 4265 | 0 | 3300000 | 11300000 | 20 | 559 | 4200000.0 | 2900000 | 11000000 | 1900000 |
| 4266 | 2 | 6500000 | 23900000 | 18 | 457 | 1200000.0 | 12400000 | 18100000 | 7300000 |
| 4267 | 1 | 4100000 | 12800000 | 8 | 780 | 8200000.0 | 700000 | 14100000 | 5800000 |
| 4268 | 1 | 9200000 | 29700000 | 10 | 607 | 17800000.0 | 11800000 | 35700000 | 12000000 |
4269 rows × 9 columns
sns.boxplot(x = df_number.residential_assets_value);
2.commercial_assets_value (ortalama değer ile aykırı değer çözme)¶
df_number_commercial_assets_value = df_number["commercial_assets_value"]
df_number_commercial_assets_value.head()
0 17600000 1 2200000 2 4500000 3 3300000 4 8200000 Name: commercial_assets_value, dtype: int64
sns.boxplot(x = df_number_commercial_assets_value);
Q1 = df_number_commercial_assets_value.quantile(0.25)
Q3 = df_number_commercial_assets_value.quantile(0.75)
IQR = Q3-Q1
print(Q1)
print(Q3)
print(IQR)
1300000.0 7600000.0 6300000.0
alt_sinir = Q1- 1.5*IQR
ust_sinir = Q3 + 1.5*IQR
print(alt_sinir)
print(ust_sinir)
-8150000.0 17050000.0
(df_number_commercial_assets_value < alt_sinir) | (df_number_commercial_assets_value > ust_sinir)
0 True
1 False
2 False
3 False
4 False
...
4264 False
4265 False
4266 False
4267 False
4268 False
Name: commercial_assets_value, Length: 4269, dtype: bool
aykiri_tf = (df_number_commercial_assets_value < alt_sinir) | (df_number_commercial_assets_value > ust_sinir)
df_number_commercial_assets_value[aykiri_tf]
0 17600000 157 18700000 231 17500000 258 19000000 323 17200000 367 18500000 554 18800000 791 17800000 895 18500000 905 17900000 1029 18300000 1131 17300000 1194 17300000 1254 18900000 1272 18400000 1304 18200000 1609 17300000 1761 19000000 1768 19400000 1812 17800000 2004 17500000 2302 17900000 2349 17400000 2914 18500000 2933 17600000 2940 19000000 2976 17700000 3000 18800000 3439 18400000 3541 17200000 3790 18400000 3827 17700000 3882 18500000 3949 17600000 4010 17600000 4120 17900000 4205 17600000 Name: commercial_assets_value, dtype: int64
df_number_commercial_assets_value[aykiri_tf].index
Index([ 0, 157, 231, 258, 323, 367, 554, 791, 895, 905, 1029, 1131,
1194, 1254, 1272, 1304, 1609, 1761, 1768, 1812, 2004, 2302, 2349, 2914,
2933, 2940, 2976, 3000, 3439, 3541, 3790, 3827, 3882, 3949, 4010, 4120,
4205],
dtype='int64')
df_number_commercial_assets_value = pd.DataFrame(df_number_commercial_assets_value)
df_number_commercial_assets_value.shape
(4269, 1)
df_number_commercial_assets_value.mean()
commercial_assets_value 4.973155e+06 dtype: float64
df_number.loc[aykiri_tf, 'commercial_assets_value'] = df_number['commercial_assets_value'].mean()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_17764\2185203214.py:1: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '4973155.3056922' has dtype incompatible with int64, please explicitly cast to a compatible dtype first. df_number.loc[aykiri_tf, 'commercial_assets_value'] = df_number['commercial_assets_value'].mean()
df_number
| no_of_dependents | income_annum | loan_amount | loan_term | cibil_score | residential_assets_value | commercial_assets_value | luxury_assets_value | bank_asset_value | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 9600000 | 29900000 | 12 | 778 | 2400000.0 | 4.973155e+06 | 22700000 | 8000000 |
| 1 | 0 | 4100000 | 12200000 | 8 | 417 | 2700000.0 | 2.200000e+06 | 8800000 | 3300000 |
| 2 | 3 | 9100000 | 29700000 | 20 | 506 | 7100000.0 | 4.500000e+06 | 33300000 | 12800000 |
| 3 | 3 | 8200000 | 30700000 | 8 | 467 | 18200000.0 | 3.300000e+06 | 23300000 | 7900000 |
| 4 | 5 | 9800000 | 24200000 | 20 | 382 | 12400000.0 | 8.200000e+06 | 29400000 | 5000000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4264 | 5 | 1000000 | 2300000 | 12 | 317 | 2800000.0 | 5.000000e+05 | 3300000 | 800000 |
| 4265 | 0 | 3300000 | 11300000 | 20 | 559 | 4200000.0 | 2.900000e+06 | 11000000 | 1900000 |
| 4266 | 2 | 6500000 | 23900000 | 18 | 457 | 1200000.0 | 1.240000e+07 | 18100000 | 7300000 |
| 4267 | 1 | 4100000 | 12800000 | 8 | 780 | 8200000.0 | 7.000000e+05 | 14100000 | 5800000 |
| 4268 | 1 | 9200000 | 29700000 | 10 | 607 | 17800000.0 | 1.180000e+07 | 35700000 | 12000000 |
4269 rows × 9 columns
sns.boxplot(x = df_number.commercial_assets_value);
3.bank_asset_value (ortalama değer ile aykırı değer çözme)¶
df_number_bank_asset_value = df_number["bank_asset_value"]
df_number_bank_asset_value.head()
0 8000000 1 3300000 2 12800000 3 7900000 4 5000000 Name: bank_asset_value, dtype: int64
sns.boxplot(x = df_number_bank_asset_value);
Q1 = df_number_bank_asset_value.quantile(0.25)
Q3 = df_number_bank_asset_value.quantile(0.75)
IQR = Q3-Q1
print(Q1)
print(Q3)
print(IQR)
2300000.0 7100000.0 4800000.0
alt_sinir = Q1- 1.5*IQR
ust_sinir = Q3 + 1.5*IQR
print(alt_sinir)
print(ust_sinir)
-4900000.0 14300000.0
(df_number_bank_asset_value < alt_sinir) | (df_number_bank_asset_value > ust_sinir)
0 False
1 False
2 False
3 False
4 False
...
4264 False
4265 False
4266 False
4267 False
4268 False
Name: bank_asset_value, Length: 4269, dtype: bool
aykiri_tf = (df_number_bank_asset_value < alt_sinir) | (df_number_bank_asset_value > ust_sinir)
df_number_bank_asset_value[aykiri_tf]
200 14400000 1272 14700000 1633 14600000 1674 14600000 1805 14700000 Name: bank_asset_value, dtype: int64
df_number_bank_asset_value[aykiri_tf].index
Index([200, 1272, 1633, 1674, 1805], dtype='int64')
df_number_bank_asset_value = pd.DataFrame(df_number_bank_asset_value)
df_number_bank_asset_value.shape
(4269, 1)
df_number_bank_asset_value.mean()
bank_asset_value 4.976692e+06 dtype: float64
df_number.loc[aykiri_tf, 'bank_asset_value'] = df_number['bank_asset_value'].mean()
C:\Users\ENIAC\AppData\Local\Temp\ipykernel_17764\2284535543.py:1: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '4976692.433825252' has dtype incompatible with int64, please explicitly cast to a compatible dtype first. df_number.loc[aykiri_tf, 'bank_asset_value'] = df_number['bank_asset_value'].mean()
df_number
| no_of_dependents | income_annum | loan_amount | loan_term | cibil_score | residential_assets_value | commercial_assets_value | luxury_assets_value | bank_asset_value | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 9600000 | 29900000 | 12 | 778 | 2400000.0 | 4.973155e+06 | 22700000 | 8000000.0 |
| 1 | 0 | 4100000 | 12200000 | 8 | 417 | 2700000.0 | 2.200000e+06 | 8800000 | 3300000.0 |
| 2 | 3 | 9100000 | 29700000 | 20 | 506 | 7100000.0 | 4.500000e+06 | 33300000 | 12800000.0 |
| 3 | 3 | 8200000 | 30700000 | 8 | 467 | 18200000.0 | 3.300000e+06 | 23300000 | 7900000.0 |
| 4 | 5 | 9800000 | 24200000 | 20 | 382 | 12400000.0 | 8.200000e+06 | 29400000 | 5000000.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4264 | 5 | 1000000 | 2300000 | 12 | 317 | 2800000.0 | 5.000000e+05 | 3300000 | 800000.0 |
| 4265 | 0 | 3300000 | 11300000 | 20 | 559 | 4200000.0 | 2.900000e+06 | 11000000 | 1900000.0 |
| 4266 | 2 | 6500000 | 23900000 | 18 | 457 | 1200000.0 | 1.240000e+07 | 18100000 | 7300000.0 |
| 4267 | 1 | 4100000 | 12800000 | 8 | 780 | 8200000.0 | 7.000000e+05 | 14100000 | 5800000.0 |
| 4268 | 1 | 9200000 | 29700000 | 10 | 607 | 17800000.0 | 1.180000e+07 | 35700000 | 12000000.0 |
4269 rows × 9 columns
sns.boxplot(x = df_number.bank_asset_value);
Kategorik Değişkenlerin Sürekliye çevrilmesi¶
df_category.head()
| education | self_employed | loan_status | |
|---|---|---|---|
| 0 | Graduate | No | Approved |
| 1 | Not Graduate | Yes | Rejected |
| 2 | Graduate | No | Rejected |
| 3 | Graduate | No | Rejected |
| 4 | Not Graduate | Yes | Rejected |
df_category = pd.get_dummies(df_category, columns = ["education"], prefix = ["education"])
df_category
| self_employed | loan_status | education_ Graduate | education_ Not Graduate | |
|---|---|---|---|---|
| 0 | No | Approved | True | False |
| 1 | Yes | Rejected | False | True |
| 2 | No | Rejected | True | False |
| 3 | No | Rejected | True | False |
| 4 | Yes | Rejected | False | True |
| ... | ... | ... | ... | ... |
| 4264 | Yes | Rejected | True | False |
| 4265 | Yes | Approved | False | True |
| 4266 | No | Rejected | False | True |
| 4267 | No | Approved | False | True |
| 4268 | No | Approved | True | False |
4269 rows × 4 columns
df_category = pd.get_dummies(df_category, columns = ["self_employed"], prefix = ["self_employed"])
df_category
| loan_status | education_ Graduate | education_ Not Graduate | self_employed_ No | self_employed_ Yes | |
|---|---|---|---|---|---|
| 0 | Approved | True | False | True | False |
| 1 | Rejected | False | True | False | True |
| 2 | Rejected | True | False | True | False |
| 3 | Rejected | True | False | True | False |
| 4 | Rejected | False | True | False | True |
| ... | ... | ... | ... | ... | ... |
| 4264 | Rejected | True | False | False | True |
| 4265 | Approved | False | True | False | True |
| 4266 | Rejected | False | True | True | False |
| 4267 | Approved | False | True | True | False |
| 4268 | Approved | True | False | True | False |
4269 rows × 5 columns
df_category = pd.get_dummies(df_category, columns = ["loan_status"], prefix = ["loan_status"])
df_category
| education_ Graduate | education_ Not Graduate | self_employed_ No | self_employed_ Yes | loan_status_ Approved | loan_status_ Rejected | |
|---|---|---|---|---|---|---|
| 0 | True | False | True | False | True | False |
| 1 | False | True | False | True | False | True |
| 2 | True | False | True | False | False | True |
| 3 | True | False | True | False | False | True |
| 4 | False | True | False | True | False | True |
| ... | ... | ... | ... | ... | ... | ... |
| 4264 | True | False | False | True | False | True |
| 4265 | False | True | False | True | True | False |
| 4266 | False | True | True | False | False | True |
| 4267 | False | True | True | False | True | False |
| 4268 | True | False | True | False | True | False |
4269 rows × 6 columns
df_category
| education_ Graduate | education_ Not Graduate | self_employed_ No | self_employed_ Yes | loan_status_ Approved | loan_status_ Rejected | |
|---|---|---|---|---|---|---|
| 0 | True | False | True | False | True | False |
| 1 | False | True | False | True | False | True |
| 2 | True | False | True | False | False | True |
| 3 | True | False | True | False | False | True |
| 4 | False | True | False | True | False | True |
| ... | ... | ... | ... | ... | ... | ... |
| 4264 | True | False | False | True | False | True |
| 4265 | False | True | False | True | True | False |
| 4266 | False | True | True | False | False | True |
| 4267 | False | True | True | False | True | False |
| 4268 | True | False | True | False | True | False |
4269 rows × 6 columns
df_category = df_category.astype(int) ## TRUE FALSE HEPSİNİ SAYIYA ÇEVİRDİK.
df_category
| education_ Graduate | education_ Not Graduate | self_employed_ No | self_employed_ Yes | loan_status_ Approved | loan_status_ Rejected | |
|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 1 | 0 | 1 | 0 |
| 1 | 0 | 1 | 0 | 1 | 0 | 1 |
| 2 | 1 | 0 | 1 | 0 | 0 | 1 |
| 3 | 1 | 0 | 1 | 0 | 0 | 1 |
| 4 | 0 | 1 | 0 | 1 | 0 | 1 |
| ... | ... | ... | ... | ... | ... | ... |
| 4264 | 1 | 0 | 0 | 1 | 0 | 1 |
| 4265 | 0 | 1 | 0 | 1 | 1 | 0 |
| 4266 | 0 | 1 | 1 | 0 | 0 | 1 |
| 4267 | 0 | 1 | 1 | 0 | 1 | 0 |
| 4268 | 1 | 0 | 1 | 0 | 1 | 0 |
4269 rows × 6 columns
ml_df = pd.concat([df_number,df_category],axis=1)
ml_df.rename(columns=lambda x: x.replace('_ ', '_'), inplace=True)
print(ml_df.columns)
ml_df.loan_status_Approved
Index(['no_of_dependents', 'income_annum', 'loan_amount', 'loan_term',
'cibil_score', 'residential_assets_value', 'commercial_assets_value',
'luxury_assets_value', 'bank_asset_value', 'education_Graduate',
'education_Not Graduate', 'self_employed_No', 'self_employed_Yes',
'loan_status_Approved', 'loan_status_Rejected'],
dtype='object')
0 1
1 0
2 0
3 0
4 0
..
4264 0
4265 1
4266 0
4267 1
4268 1
Name: loan_status_Approved, Length: 4269, dtype: int32
Makine Öğrenmesi¶
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import seaborn as sns
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.metrics import roc_auc_score,roc_curve
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from skompiler import skompile
from warnings import filterwarnings
filterwarnings('ignore')
Lojistik Regresyon¶
Model¶
X = ml_df.drop('loan_status_Approved', axis=1) # Bağımsız değişkenler
y = ml_df['loan_status_Approved'] # Bağımlı değişken
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
LogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression()
#scikit-learn
loj = LogisticRegression(solver = "liblinear")
loj_model = loj.fit(X,y)
loj_model
LogisticRegression(solver='liblinear')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(solver='liblinear')
loj_model.intercept_
array([9.94034034e-10])
loj_model.coef_
array([[ 2.04994562e-09, -9.99506665e-08, 5.05309185e-08,
8.28344107e-10, 2.65294943e-06, -4.04025410e-09,
2.13252137e-08, -3.17712877e-09, 1.80822711e-08,
5.32102476e-10, 4.61931559e-10, 4.81703437e-10,
5.12330597e-10, -7.07921933e-09]])
Tahmin & Model Tuning
y_pred = log_reg.predict(X_test)
print(classification_report(y_test, y_pred))
precision recall f1-score support
0 0.41 0.03 0.05 471
1 0.63 0.98 0.77 810
accuracy 0.63 1281
macro avg 0.52 0.50 0.41 1281
weighted avg 0.55 0.63 0.50 1281
Bu rapora göre, modelin "1" sınıfını tahmin etme yeteneği oldukça yüksek görünüyor (hassasiyet ve geri çağrı yüksek), ancak "0" sınıfını tahmin etme yeteneği oldukça zayıf (düşük hassasiyet ve geri çağrı). Bu, modelin "1" sınıfını iyi tahmin ettiğini ancak "0" sınıfını çok az tahmin ettiğini gösterir.
modele ilişkin olasılık değerleri
log_reg.predict(X)[0:10]
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
y[0:10]
0 1 1 0 2 0 3 0 4 0 5 0 6 1 7 0 8 1 9 0 Name: loan_status_Approved, dtype: int32
#modele ilişkin olasılık değerleri. ilk sutun '0' ikinci sutun '1' sınıfına ait olma olasılık
log_reg.predict_proba(X)[0:10]
array([[0.33662306, 0.66337694],
[0.44093498, 0.55906502],
[0.29769414, 0.70230586],
[0.33676859, 0.66323141],
[0.42993152, 0.57006848],
[0.40332827, 0.59667173],
[0.29282617, 0.70717383],
[0.44204698, 0.55795302],
[0.48961826, 0.51038174],
[0.4671262 , 0.5328738 ]])
esikleme isleminin el ile yapilmasi
y_probs = log_reg.predict_proba(X)
y_probs = y_probs[:,1]
y_probs[0:10]
array([0.66337694, 0.55906502, 0.70230586, 0.66323141, 0.57006848,
0.59667173, 0.70717383, 0.55795302, 0.51038174, 0.5328738 ])
#esikleme isleminin el ile yapilmasi
y_pred = [1 if i > 0.5 else 0 for i in y_probs]
y_pred[0:10]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
raporlama
confusion_matrix(y, y_pred)
array([[ 42, 1571],
[ 51, 2605]], dtype=int64)
accuracy_score(y, y_pred)
0.6200515343171703
print(classification_report(y, y_pred))
precision recall f1-score support
0 0.45 0.03 0.05 1613
1 0.62 0.98 0.76 2656
accuracy 0.62 4269
macro avg 0.54 0.50 0.41 4269
weighted avg 0.56 0.62 0.49 4269
log_reg.predict_proba(X)[:,1][0:5]
array([0.66337694, 0.55906502, 0.70230586, 0.66323141, 0.57006848])
logit_roc_auc = roc_auc_score(y, log_reg.predict(X))
fpr, tpr, thresholds = roc_curve(y, log_reg.predict_proba(X)[:,1])
plt.figure()
plt.plot(fpr, tpr, label='AUC (area = %0.2f)' % logit_roc_auc)
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Oranı')
plt.ylabel('True Positive Oranı')
plt.title('ROC')
plt.show()
print("AUC=",logit_roc_auc)
AUC= 0.5034183152324113
#test train ayırma işlemi
#verisetindeki verisetinin azlığından, sınıfların eşit dağılmamasından dolayı
#test setini 30% olarak ayarladık
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size = 0.30,
random_state = 42)
loj = LogisticRegression(solver = "liblinear")
loj_model = loj.fit(X_train,y_train)
loj_model
LogisticRegression(solver='liblinear')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(solver='liblinear')
accuracy_score(y_test, loj_model.predict(X_test))
0.6268540202966433
cross_val_score(loj_model, X_test, y_test, cv = 10)
array([0.62790698, 0.6328125 , 0.65625 , 0.6484375 , 0.609375 ,
0.6328125 , 0.6328125 , 0.6328125 , 0.71875 , 0.6171875 ])
cross_val_score(loj_model, X_test, y_test, cv = 10).mean()
0.6409156976744186
Gaussian Naive Bayes¶
X = ml_df.drop('loan_status_Approved', axis=1) # Bağımsız değişkenler
y = ml_df['loan_status_Approved'] # Bağımlı değişken
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
nb = GaussianNB()
nb_model = nb.fit(X_train, y_train)
nb_model
GaussianNB()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GaussianNB()
nb_model.predict(X_test)[0:10]
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
nb_model.predict_proba(X_test)[0:10]
array([[0.49531051, 0.50468949],
[0.37832043, 0.62167957],
[0.45432182, 0.54567818],
[0.37032466, 0.62967534],
[0.31148022, 0.68851978],
[0.35897057, 0.64102943],
[0.31247462, 0.68752538],
[0.4568877 , 0.5431123 ],
[0.37310464, 0.62689536],
[0.41564146, 0.58435854]])
y_pred = nb_model.predict(X_test)
accuracy_score(y_test, y_pred)
0.7681498829039812
cross_val_score(nb_model, X_test, y_test, cv = 10).mean()
0.7658187984496123
KNN¶
X = ml_df.drop('loan_status_Approved', axis=1) # Bağımsız değişkenler
y = ml_df['loan_status_Approved'] # Bağımlı değişken
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
knn = KNeighborsClassifier()
knn_model = knn.fit(X_train, y_train)
knn_model
KNeighborsClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KNeighborsClassifier()
y_pred = knn_model.predict(X_test)
accuracy_score(y_test, y_pred)
0.5784543325526932
print(classification_report(y_test, y_pred))
precision recall f1-score support
0 0.40 0.30 0.34 471
1 0.65 0.74 0.69 810
accuracy 0.58 1281
macro avg 0.52 0.52 0.52 1281
weighted avg 0.56 0.58 0.56 1281
knn_params = {"n_neighbors": np.arange(1,50)}
knn = KNeighborsClassifier()
knn_cv = GridSearchCV(knn, knn_params, cv=10)
knn_cv.fit(X_train, y_train)
GridSearchCV(cv=10, estimator=KNeighborsClassifier(),
param_grid={'n_neighbors': array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])})In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=10, estimator=KNeighborsClassifier(),
param_grid={'n_neighbors': array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])})KNeighborsClassifier()
KNeighborsClassifier()
print("En iyi skor:" + str(knn_cv.best_score_))
print("En iyi parametreler: " + str(knn_cv.best_params_))
En iyi skor:0.6157942582658077
En iyi parametreler: {'n_neighbors': 49}
knn = KNeighborsClassifier(49)
knn_tuned = knn.fit(X_train, y_train)
knn_tuned.score(X_test, y_test)
0.6104605776736924
y_pred = knn_tuned.predict(X_test)
accuracy_score(y_test, y_pred)
0.6104605776736924
SVC¶
X = ml_df.drop('loan_status_Approved', axis=1) # Bağımsız değişkenler
y = ml_df['loan_status_Approved'] # Bağımlı değişken
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
svm_model = SVC.fit(X_train, y_train)
svc_params = {"C": np.arange(1,10)}
svc = SVC(kernel = "linear")
svc_cv_model = GridSearchCV(svc,svc_params,
cv = 10,
n_jobs = -1,
verbose = 2 )
svc_cv_model.fit(X_train, y_train)
YSA¶
X = ml_df.drop('loan_status_Approved', axis=1) # Bağımsız değişkenler
y = ml_df['loan_status_Approved'] # Bağımlı değişken
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
#veri standardizasyonu
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_test_scaled[0:5]
array([[ 1.52932527, 0.1138622 , 0.49415249, 1.57740038, -1.02744105,
-0.11063326, 1.22342595, 0.0528932 , 0.72466991, 0.99266412,
-0.99266412, 1.0148356 , -1.0148356 , 1.27140173],
[-0.25151024, 0.2911526 , -0.1329306 , -0.52470577, -0.01207882,
-0.40407016, 1.10455922, 0.28186474, 0.53869149, 0.99266412,
-0.99266412, 1.0148356 , -1.0148356 , -0.78653346],
[ 0.3421016 , 1.6031015 , 0.5161554 , 0.52634731, -0.86013705,
-0.48558041, 2.6973734 , 1.44852927, 0.50769508, 0.99266412,
-0.99266412, 1.0148356 , -1.0148356 , 1.27140173],
[-0.25151024, 0.39752683, 0.90120642, -0.52470577, 0.0225358 ,
0.45993849, 1.41361272, 0.71800101, 1.31360159, 0.99266412,
-0.99266412, 1.0148356 , -1.0148356 , -0.78653346],
[ 0.3421016 , 0.25569452, -0.12192914, 0.17599628, 0.78982658,
0.73707335, -0.10788141, 0.02018298, 1.06563036, -1.00739009,
1.00739009, -0.98538127, 0.98538127, -0.78653346]])
mlpc = MLPClassifier().fit(X_train_scaled, y_train)
#modelden neler ogrenebiliriz?
dir(mlpc)
['__abstractmethods__', '__annotations__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_backprop', '_check_feature_names', '_check_n_features', '_check_solver', '_compute_loss_grad', '_estimator_type', '_fit', '_fit_lbfgs', '_fit_stochastic', '_forward_pass', '_forward_pass_fast', '_get_param_names', '_get_tags', '_init_coef', '_initialize', '_label_binarizer', '_loss_grad_lbfgs', '_more_tags', '_no_improvement_count', '_optimizer', '_parameter_constraints', '_predict', '_random_state', '_repr_html_', '_repr_html_inner', '_repr_mimebundle_', '_score', '_unpack', '_update_no_improvement_count', '_validate_data', '_validate_input', '_validate_params', 'activation', 'alpha', 'batch_size', 'best_loss_', 'best_validation_score_', 'beta_1', 'beta_2', 'classes_', 'coefs_', 'early_stopping', 'epsilon', 'fit', 'get_params', 'hidden_layer_sizes', 'intercepts_', 'learning_rate', 'learning_rate_init', 'loss', 'loss_', 'loss_curve_', 'max_fun', 'max_iter', 'momentum', 'n_features_in_', 'n_iter_', 'n_iter_no_change', 'n_layers_', 'n_outputs_', 'nesterovs_momentum', 'out_activation_', 'partial_fit', 'power_t', 'predict', 'predict_log_proba', 'predict_proba', 'random_state', 'score', 'set_params', 'shuffle', 'solver', 't_', 'tol', 'validation_fraction', 'validation_scores_', 'verbose', 'warm_start']
y_pred = mlpc.predict(X_test_scaled)
accuracy_score(y_test, y_pred)
1.0
mlpc
MLPClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
MLPClassifier()
?mlpc
Type: MLPClassifier String form: MLPClassifier() File: c:\users\eniac\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py Docstring: Multi-layer Perceptron classifier. This model optimizes the log-loss function using LBFGS or stochastic gradient descent. .. versionadded:: 0.18 Parameters ---------- hidden_layer_sizes : array-like of shape(n_layers - 2,), default=(100,) The ith element represents the number of neurons in the ith hidden layer. activation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu' Activation function for the hidden layer. - 'identity', no-op activation, useful to implement linear bottleneck, returns f(x) = x - 'logistic', the logistic sigmoid function, returns f(x) = 1 / (1 + exp(-x)). - 'tanh', the hyperbolic tan function, returns f(x) = tanh(x). - 'relu', the rectified linear unit function, returns f(x) = max(0, x) solver : {'lbfgs', 'sgd', 'adam'}, default='adam' The solver for weight optimization. - 'lbfgs' is an optimizer in the family of quasi-Newton methods. - 'sgd' refers to stochastic gradient descent. - 'adam' refers to a stochastic gradient-based optimizer proposed by Kingma, Diederik, and Jimmy Ba Note: The default solver 'adam' works pretty well on relatively large datasets (with thousands of training samples or more) in terms of both training time and validation score. For small datasets, however, 'lbfgs' can converge faster and perform better. alpha : float, default=0.0001 Strength of the L2 regularization term. The L2 regularization term is divided by the sample size when added to the loss. batch_size : int, default='auto' Size of minibatches for stochastic optimizers. If the solver is 'lbfgs', the classifier will not use minibatch. When set to "auto", `batch_size=min(200, n_samples)`. learning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant' Learning rate schedule for weight updates. - 'constant' is a constant learning rate given by 'learning_rate_init'. - 'invscaling' gradually decreases the learning rate at each time step 't' using an inverse scaling exponent of 'power_t'. effective_learning_rate = learning_rate_init / pow(t, power_t) - 'adaptive' keeps the learning rate constant to 'learning_rate_init' as long as training loss keeps decreasing. Each time two consecutive epochs fail to decrease training loss by at least tol, or fail to increase validation score by at least tol if 'early_stopping' is on, the current learning rate is divided by 5. Only used when ``solver='sgd'``. learning_rate_init : float, default=0.001 The initial learning rate used. It controls the step-size in updating the weights. Only used when solver='sgd' or 'adam'. power_t : float, default=0.5 The exponent for inverse scaling learning rate. It is used in updating effective learning rate when the learning_rate is set to 'invscaling'. Only used when solver='sgd'. max_iter : int, default=200 Maximum number of iterations. The solver iterates until convergence (determined by 'tol') or this number of iterations. For stochastic solvers ('sgd', 'adam'), note that this determines the number of epochs (how many times each data point will be used), not the number of gradient steps. shuffle : bool, default=True Whether to shuffle samples in each iteration. Only used when solver='sgd' or 'adam'. random_state : int, RandomState instance, default=None Determines random number generation for weights and bias initialization, train-test split if early stopping is used, and batch sampling when solver='sgd' or 'adam'. Pass an int for reproducible results across multiple function calls. See :term:`Glossary <random_state>`. tol : float, default=1e-4 Tolerance for the optimization. When the loss or score is not improving by at least ``tol`` for ``n_iter_no_change`` consecutive iterations, unless ``learning_rate`` is set to 'adaptive', convergence is considered to be reached and training stops. verbose : bool, default=False Whether to print progress messages to stdout. warm_start : bool, default=False When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary <warm_start>`. momentum : float, default=0.9 Momentum for gradient descent update. Should be between 0 and 1. Only used when solver='sgd'. nesterovs_momentum : bool, default=True Whether to use Nesterov's momentum. Only used when solver='sgd' and momentum > 0. early_stopping : bool, default=False Whether to use early stopping to terminate training when validation score is not improving. If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least tol for ``n_iter_no_change`` consecutive epochs. The split is stratified, except in a multilabel setting. If early stopping is False, then the training stops when the training loss does not improve by more than tol for n_iter_no_change consecutive passes over the training set. Only effective when solver='sgd' or 'adam'. validation_fraction : float, default=0.1 The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True. beta_1 : float, default=0.9 Exponential decay rate for estimates of first moment vector in adam, should be in [0, 1). Only used when solver='adam'. beta_2 : float, default=0.999 Exponential decay rate for estimates of second moment vector in adam, should be in [0, 1). Only used when solver='adam'. epsilon : float, default=1e-8 Value for numerical stability in adam. Only used when solver='adam'. n_iter_no_change : int, default=10 Maximum number of epochs to not meet ``tol`` improvement. Only effective when solver='sgd' or 'adam'. .. versionadded:: 0.20 max_fun : int, default=15000 Only used when solver='lbfgs'. Maximum number of loss function calls. The solver iterates until convergence (determined by 'tol'), number of iterations reaches max_iter, or this number of loss function calls. Note that number of loss function calls will be greater than or equal to the number of iterations for the `MLPClassifier`. .. versionadded:: 0.22 Attributes ---------- classes_ : ndarray or list of ndarray of shape (n_classes,) Class labels for each output. loss_ : float The current loss computed with the loss function. best_loss_ : float or None The minimum loss reached by the solver throughout fitting. If `early_stopping=True`, this attribute is set ot `None`. Refer to the `best_validation_score_` fitted attribute instead. loss_curve_ : list of shape (`n_iter_`,) The ith element in the list represents the loss at the ith iteration. validation_scores_ : list of shape (`n_iter_`,) or None The score at each iteration on a held-out validation set. The score reported is the accuracy score. Only available if `early_stopping=True`, otherwise the attribute is set to `None`. best_validation_score_ : float or None The best validation score (i.e. accuracy score) that triggered the early stopping. Only available if `early_stopping=True`, otherwise the attribute is set to `None`. t_ : int The number of training samples seen by the solver during fitting. coefs_ : list of shape (n_layers - 1,) The ith element in the list represents the weight matrix corresponding to layer i. intercepts_ : list of shape (n_layers - 1,) The ith element in the list represents the bias vector corresponding to layer i + 1. n_features_in_ : int Number of features seen during :term:`fit`. .. versionadded:: 0.24 feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Defined only when `X` has feature names that are all strings. .. versionadded:: 1.0 n_iter_ : int The number of iterations the solver has run. n_layers_ : int Number of layers. n_outputs_ : int Number of outputs. out_activation_ : str Name of the output activation function. See Also -------- MLPRegressor : Multi-layer Perceptron regressor. BernoulliRBM : Bernoulli Restricted Boltzmann Machine (RBM). Notes ----- MLPClassifier trains iteratively since at each time step the partial derivatives of the loss function with respect to the model parameters are computed to update the parameters. It can also have a regularization term added to the loss function that shrinks model parameters to prevent overfitting. This implementation works with data represented as dense numpy arrays or sparse scipy arrays of floating point values. References ---------- Hinton, Geoffrey E. "Connectionist learning procedures." Artificial intelligence 40.1 (1989): 185-234. Glorot, Xavier, and Yoshua Bengio. "Understanding the difficulty of training deep feedforward neural networks." International Conference on Artificial Intelligence and Statistics. 2010. :arxiv:`He, Kaiming, et al (2015). "Delving deep into rectifiers: Surpassing human-level performance on imagenet classification." <1502.01852>` :arxiv:`Kingma, Diederik, and Jimmy Ba (2014) "Adam: A method for stochastic optimization." <1412.6980>` Examples -------- >>> from sklearn.neural_network import MLPClassifier >>> from sklearn.datasets import make_classification >>> from sklearn.model_selection import train_test_split >>> X, y = make_classification(n_samples=100, random_state=1) >>> X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, ... random_state=1) >>> clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train) >>> clf.predict_proba(X_test[:1]) array([[0.038..., 0.961...]]) >>> clf.predict(X_test[:5, :]) array([1, 0, 1, 0, 1]) >>> clf.score(X_test, y_test) 0.8...
mlpc_params = {"alpha": [0.1, 0.01, 0.02, 0.005, 0.0001,0.00001],
"hidden_layer_sizes": [(10,10,10),
(100,100,100),
(100,100),
(3,5),
(5, 3)],
"solver" : ["lbfgs","adam","sgd"],
"activation": ["relu","logistic"]}
mlpc = MLPClassifier()
mlpc_cv_model = GridSearchCV(mlpc, mlpc_params,
cv = 10,
n_jobs = -1,
verbose = 2)
mlpc_cv_model.fit(X_train_scaled, y_train)
Fitting 10 folds for each of 180 candidates, totalling 1800 fits
GridSearchCV(cv=10, estimator=MLPClassifier(), n_jobs=-1,
param_grid={'activation': ['relu', 'logistic'],
'alpha': [0.1, 0.01, 0.02, 0.005, 0.0001, 1e-05],
'hidden_layer_sizes': [(10, 10, 10), (100, 100, 100),
(100, 100), (3, 5), (5, 3)],
'solver': ['lbfgs', 'adam', 'sgd']},
verbose=2)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=10, estimator=MLPClassifier(), n_jobs=-1,
param_grid={'activation': ['relu', 'logistic'],
'alpha': [0.1, 0.01, 0.02, 0.005, 0.0001, 1e-05],
'hidden_layer_sizes': [(10, 10, 10), (100, 100, 100),
(100, 100), (3, 5), (5, 3)],
'solver': ['lbfgs', 'adam', 'sgd']},
verbose=2)MLPClassifier()
MLPClassifier()
print("En iyi parametreler: " + str(mlpc_cv_model.best_params_))
En iyi parametreler: {'activation': 'relu', 'alpha': 0.1, 'hidden_layer_sizes': (10, 10, 10), 'solver': 'lbfgs'}
mlpc_tuned = MLPClassifier(activation = "relu",
alpha = 0.1,
hidden_layer_sizes = (10, 10, 10),
solver = "lbfgs")
mlpc_tuned.fit(X_train_scaled, y_train)
MLPClassifier(alpha=0.1, hidden_layer_sizes=(10, 10, 10), solver='lbfgs')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
MLPClassifier(alpha=0.1, hidden_layer_sizes=(10, 10, 10), solver='lbfgs')
y_pred = mlpc_tuned.predict(X_test_scaled)
accuracy_score(y_test, y_pred)
1.0
print(classification_report(y_test, y_pred))
precision recall f1-score support
0 1.00 1.00 1.00 471
1 1.00 1.00 1.00 810
accuracy 1.00 1281
macro avg 1.00 1.00 1.00 1281
weighted avg 1.00 1.00 1.00 1281
Karar Ağaçları¶
X = ml_df.drop('loan_status_Approved', axis=1) # Bağımsız değişkenler
y = ml_df['loan_status_Approved'] # Bağımlı değişken
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
cart = DecisionTreeClassifier()
cart_model = cart.fit(X_train, y_train)
cart_model
DecisionTreeClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier()
import astor
#modelin python koduna donusturulmesi
print(skompile(cart_model.predict).to("python/code"))
(1 if x[13] <= 0.5 else 0)
y_pred = cart_model.predict(X_test)
accuracy_score(y_test, y_pred)
1.0
cart_model
DecisionTreeClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier()
?cart_model
Type: DecisionTreeClassifier String form: DecisionTreeClassifier() File: c:\users\eniac\anaconda3\lib\site-packages\sklearn\tree\_classes.py Docstring: A decision tree classifier. Read more in the :ref:`User Guide <tree>`. Parameters ---------- criterion : {"gini", "entropy", "log_loss"}, default="gini" The function to measure the quality of a split. Supported criteria are "gini" for the Gini impurity and "log_loss" and "entropy" both for the Shannon information gain, see :ref:`tree_mathematical_formulation`. splitter : {"best", "random"}, default="best" The strategy used to choose the split at each node. Supported strategies are "best" to choose the best split and "random" to choose the best random split. max_depth : int, default=None The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples. min_samples_split : int or float, default=2 The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for fractions. min_samples_leaf : int or float, default=1 The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for fractions. min_weight_fraction_leaf : float, default=0.0 The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided. max_features : int, float or {"auto", "sqrt", "log2"}, default=None The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and `max(1, int(max_features * n_features_in_))` features are considered at each split. - If "auto", then `max_features=sqrt(n_features)`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. .. deprecated:: 1.1 The `"auto"` option was deprecated in 1.1 and will be removed in 1.3. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features. random_state : int, RandomState instance or None, default=None Controls the randomness of the estimator. The features are always randomly permuted at each split, even if ``splitter`` is set to ``"best"``. When ``max_features < n_features``, the algorithm will select ``max_features`` at random at each split before finding the best split among them. But the best found split may vary across different runs, even if ``max_features=n_features``. That is the case, if the improvement of the criterion is identical for several splits and one split has to be selected at random. To obtain a deterministic behaviour during fitting, ``random_state`` has to be fixed to an integer. See :term:`Glossary <random_state>` for details. max_leaf_nodes : int, default=None Grow a tree with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes. min_impurity_decrease : float, default=0.0 A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19 class_weight : dict, list of dict or "balanced", default=None Weights associated with classes in the form ``{class_label: weight}``. If None, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. Note that for multioutput (including multilabel) weights should be defined for each class of every column in its own dict. For example, for four-class multilabel classification weights should be [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of [{1:1}, {2:5}, {3:1}, {4:1}]. The "balanced" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` For multi-output, the weights of each column of y will be multiplied. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified. ccp_alpha : non-negative float, default=0.0 Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22 Attributes ---------- classes_ : ndarray of shape (n_classes,) or list of ndarray The classes labels (single output problem), or a list of arrays of class labels (multi-output problem). feature_importances_ : ndarray of shape (n_features,) The impurity-based feature importances. The higher, the more important the feature. The importance of a feature is computed as the (normalized) total reduction of the criterion brought by that feature. It is also known as the Gini importance [4]_. Warning: impurity-based feature importances can be misleading for high cardinality features (many unique values). See :func:`sklearn.inspection.permutation_importance` as an alternative. max_features_ : int The inferred value of max_features. n_classes_ : int or list of int The number of classes (for single output problems), or a list containing the number of classes for each output (for multi-output problems). n_features_in_ : int Number of features seen during :term:`fit`. .. versionadded:: 0.24 feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Defined only when `X` has feature names that are all strings. .. versionadded:: 1.0 n_outputs_ : int The number of outputs when ``fit`` is performed. tree_ : Tree instance The underlying Tree object. Please refer to ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py` for basic usage of these attributes. See Also -------- DecisionTreeRegressor : A decision tree regressor. Notes ----- The default values for the parameters controlling the size of the trees (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and unpruned trees which can potentially be very large on some data sets. To reduce memory consumption, the complexity and size of the trees should be controlled by setting those parameter values. The :meth:`predict` method operates using the :func:`numpy.argmax` function on the outputs of :meth:`predict_proba`. This means that in case the highest predicted probabilities are tied, the classifier will predict the tied class with the lowest index in :term:`classes_`. References ---------- .. [1] https://en.wikipedia.org/wiki/Decision_tree_learning .. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, "Classification and Regression Trees", Wadsworth, Belmont, CA, 1984. .. [3] T. Hastie, R. Tibshirani and J. Friedman. "Elements of Statistical Learning", Springer, 2009. .. [4] L. Breiman, and A. Cutler, "Random Forests", https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm Examples -------- >>> from sklearn.datasets import load_iris >>> from sklearn.model_selection import cross_val_score >>> from sklearn.tree import DecisionTreeClassifier >>> clf = DecisionTreeClassifier(random_state=0) >>> iris = load_iris() >>> cross_val_score(clf, iris.data, iris.target, cv=10) ... # doctest: +SKIP ... array([ 1. , 0.93..., 0.86..., 0.93..., 0.93..., 0.93..., 0.93..., 1. , 0.93..., 1. ])
cart_grid = {"max_depth": range(1,10),
"min_samples_split" : list(range(2,50)) }
cart = tree.DecisionTreeClassifier()
cart_cv = GridSearchCV(cart, cart_grid, cv = 10, n_jobs = -1, verbose = 2)
cart_cv_model = cart_cv.fit(X_train, y_train)
Fitting 10 folds for each of 432 candidates, totalling 4320 fits
print("En iyi parametreler: " + str(cart_cv_model.best_params_))
En iyi parametreler: {'max_depth': 1, 'min_samples_split': 2}
cart = tree.DecisionTreeClassifier(max_depth = 1, min_samples_split = 2)
cart_tuned = cart.fit(X_train, y_train)
y_pred = cart_tuned.predict(X_test)
accuracy_score(y_test, y_pred)
1.0
Random Forests¶
X = ml_df.drop('loan_status_Approved', axis=1) # Bağımsız değişkenler
y = ml_df['loan_status_Approved'] # Bağımlı değişken
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
rf_model = RandomForestClassifier().fit(X_train, y_train)
y_pred = rf_model.predict(X_test)
accuracy_score(y_test, y_pred)
1.0
rf_params = {"max_depth": [2,5,8,10],
"max_features": [2,5,8],
"n_estimators": [10,500,1000],
"min_samples_split": [2,5,10]}
rf_model = RandomForestClassifier()
rf_cv_model = GridSearchCV(rf_model,
rf_params,
cv = 10,
n_jobs = -1,
verbose = 2)
rf_cv_model.fit(X_train, y_train)
Fitting 10 folds for each of 108 candidates, totalling 1080 fits
GridSearchCV(cv=10, estimator=RandomForestClassifier(), n_jobs=-1,
param_grid={'max_depth': [2, 5, 8, 10], 'max_features': [2, 5, 8],
'min_samples_split': [2, 5, 10],
'n_estimators': [10, 500, 1000]},
verbose=2)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=10, estimator=RandomForestClassifier(), n_jobs=-1,
param_grid={'max_depth': [2, 5, 8, 10], 'max_features': [2, 5, 8],
'min_samples_split': [2, 5, 10],
'n_estimators': [10, 500, 1000]},
verbose=2)RandomForestClassifier()
RandomForestClassifier()
print("En iyi parametreler: " + str(rf_cv_model.best_params_))
En iyi parametreler: {'max_depth': 2, 'max_features': 5, 'min_samples_split': 2, 'n_estimators': 10}
rf_tuned = RandomForestClassifier(max_depth = 2,
max_features = 5,
min_samples_split = 2,
n_estimators = 10)
rf_tuned.fit(X_train, y_train)
RandomForestClassifier(max_depth=2, max_features=5, n_estimators=10)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(max_depth=2, max_features=5, n_estimators=10)
y_pred = rf_tuned.predict(X_test)
accuracy_score(y_test, y_pred)
0.9976580796252927
Importance = pd.DataFrame({"Importance": rf_tuned.feature_importances_*100},
index = X_train.columns)
Importance.sort_values(by = "Importance",
axis = 0,
ascending = True).plot(kind ="barh", color = "r")
plt.xlabel("Değişken Önem Düzeyleri")
Text(0.5, 0, 'Değişken Önem Düzeyleri')
Gradient Boosting Machines (GBM)¶
X = ml_df.drop('loan_status_Approved', axis=1) # Bağımsız değişkenler
y = ml_df['loan_status_Approved'] # Bağımlı değişken
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
gbm_model = GradientBoostingClassifier().fit(X_train, y_train)
y_pred = gbm_model.predict(X_test)
accuracy_score(y_test, y_pred)
1.0
?gbm_model
gbm_params = {"learning_rate" : [0.001, 0.01, 0.1, 0.05],
"n_estimators": [100,500,100],
"max_depth": [3,5,10],
"min_samples_split": [2,5,10]}
gbm = GradientBoostingClassifier()
gbm_cv = GridSearchCV(gbm, gbm_params, cv = 10, n_jobs = -1, verbose = 2)
gbm_cv.fit(X_train, y_train)
Fitting 10 folds for each of 108 candidates, totalling 1080 fits
GridSearchCV(cv=10, estimator=GradientBoostingClassifier(), n_jobs=-1,
param_grid={'learning_rate': [0.001, 0.01, 0.1, 0.05],
'max_depth': [3, 5, 10],
'min_samples_split': [2, 5, 10],
'n_estimators': [100, 500, 100]},
verbose=2)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=10, estimator=GradientBoostingClassifier(), n_jobs=-1,
param_grid={'learning_rate': [0.001, 0.01, 0.1, 0.05],
'max_depth': [3, 5, 10],
'min_samples_split': [2, 5, 10],
'n_estimators': [100, 500, 100]},
verbose=2)GradientBoostingClassifier()
GradientBoostingClassifier()
print("En iyi parametreler: " + str(gbm_cv.best_params_))
En iyi parametreler: {'learning_rate': 0.001, 'max_depth': 3, 'min_samples_split': 2, 'n_estimators': 500}
gbm = GradientBoostingClassifier(learning_rate = 0.001,
max_depth = 3,
min_samples_split = 2,
n_estimators = 500)
gbm_tuned = gbm.fit(X_train,y_train)
y_pred = gbm_tuned.predict(X_test)
accuracy_score(y_test, y_pred)
1.0
XGBoost (eXtreme Gradient Boosting)¶
X = ml_df.drop('loan_status_Approved', axis=1) # Bağımsız değişkenler
y = ml_df['loan_status_Approved'] # Bağımlı değişken
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
xgb_model = XGBClassifier().fit(X_train, y_train)
xgb_model
XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, device=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=None, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=None, max_leaves=None,
min_child_weight=None, missing=nan, monotone_constraints=None,
multi_strategy=None, n_estimators=None, n_jobs=None,
num_parallel_tree=None, random_state=None, ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, device=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=None, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=None, max_leaves=None,
min_child_weight=None, missing=nan, monotone_constraints=None,
multi_strategy=None, n_estimators=None, n_jobs=None,
num_parallel_tree=None, random_state=None, ...)y_pred = xgb_model.predict(X_test)
accuracy_score(y_test, y_pred)
1.0
xgb_params = {
'n_estimators': [100, 500, 1000, 2000],
'subsample': [0.6, 0.8, 1.0],
'max_depth': [3, 4, 5,6],
'learning_rate': [0.1,0.01,0.02,0.05],
"min_samples_split": [2,5,10]}
xgb = XGBClassifier()
xgb_cv_model = GridSearchCV(xgb, xgb_params, cv = 10, n_jobs = -1, verbose = 2)
xgb_cv_model.fit(X_train, y_train)
Fitting 10 folds for each of 576 candidates, totalling 5760 fits
GridSearchCV(cv=10,
estimator=XGBClassifier(base_score=None, booster=None,
callbacks=None, colsample_bylevel=None,
colsample_bynode=None,
colsample_bytree=None, device=None,
early_stopping_rounds=None,
enable_categorical=False, eval_metric=None,
feature_types=None, gamma=None,
grow_policy=None, importance_type=None,
interaction_constraints=None,
learning_rate=None...
max_leaves=None, min_child_weight=None,
missing=nan, monotone_constraints=None,
multi_strategy=None, n_estimators=None,
n_jobs=None, num_parallel_tree=None,
random_state=None, ...),
n_jobs=-1,
param_grid={'learning_rate': [0.1, 0.01, 0.02, 0.05],
'max_depth': [3, 4, 5, 6],
'min_samples_split': [2, 5, 10],
'n_estimators': [100, 500, 1000, 2000],
'subsample': [0.6, 0.8, 1.0]},
verbose=2)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=10,
estimator=XGBClassifier(base_score=None, booster=None,
callbacks=None, colsample_bylevel=None,
colsample_bynode=None,
colsample_bytree=None, device=None,
early_stopping_rounds=None,
enable_categorical=False, eval_metric=None,
feature_types=None, gamma=None,
grow_policy=None, importance_type=None,
interaction_constraints=None,
learning_rate=None...
max_leaves=None, min_child_weight=None,
missing=nan, monotone_constraints=None,
multi_strategy=None, n_estimators=None,
n_jobs=None, num_parallel_tree=None,
random_state=None, ...),
n_jobs=-1,
param_grid={'learning_rate': [0.1, 0.01, 0.02, 0.05],
'max_depth': [3, 4, 5, 6],
'min_samples_split': [2, 5, 10],
'n_estimators': [100, 500, 1000, 2000],
'subsample': [0.6, 0.8, 1.0]},
verbose=2)XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, device=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=None, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=None, max_leaves=None,
min_child_weight=None, missing=nan, monotone_constraints=None,
multi_strategy=None, n_estimators=None, n_jobs=None,
num_parallel_tree=None, random_state=None, ...)XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, device=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=None, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=None, max_leaves=None,
min_child_weight=None, missing=nan, monotone_constraints=None,
multi_strategy=None, n_estimators=None, n_jobs=None,
num_parallel_tree=None, random_state=None, ...)xgb_cv_model.best_params_
{'learning_rate': 0.1,
'max_depth': 3,
'min_samples_split': 2,
'n_estimators': 100,
'subsample': 0.6}
xgb = XGBClassifier(learning_rate = 0.1,
max_depth = 3,
min_samples_split = 2,
n_estimators = 100,
subsample = 0.6)
xgb_tuned = xgb.fit(X_train,y_train)
y_pred = xgb_tuned.predict(X_test)
accuracy_score(y_test, y_pred)
1.0
Light GBM¶
X = ml_df.drop('loan_status_Approved', axis=1) # Bağımsız değişkenler
y = ml_df['loan_status_Approved'] # Bağımlı değişken
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
lgbm_model = LGBMClassifier().fit(X_train, y_train)
[LightGBM] [Warning] Found whitespace in feature_names, replace with underlines [LightGBM] [Info] Number of positive: 1846, number of negative: 1142 [LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000108 seconds. You can set `force_row_wise=true` to remove the overhead. And if memory is not enough, you can set `force_col_wise=true`. [LightGBM] [Info] Total Bins 1412 [LightGBM] [Info] Number of data points in the train set: 2988, number of used features: 14 [LightGBM] [Info] [binary:BoostFromScore]: pavg=0.617805 -> initscore=0.480240 [LightGBM] [Info] Start training from score 0.480240 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
y_pred = lgbm_model.predict(X_test)
accuracy_score(y_test, y_pred)
1.0
lgbm_params = {
'n_estimators': [100, 500, 1000, 2000],
'subsample': [0.6, 0.8, 1.0],
'max_depth': [3, 4, 5,6],
'learning_rate': [0.1,0.01,0.02,0.05],
"min_child_samples": [5,10,20]}
lgbm = LGBMClassifier()
lgbm_cv_model = GridSearchCV(lgbm, lgbm_params,
cv = 10,
n_jobs = -1,
verbose = 2)
lgbm_cv_model.fit(X_train, y_train)
Fitting 10 folds for each of 576 candidates, totalling 5760 fits [LightGBM] [Warning] Found whitespace in feature_names, replace with underlines [LightGBM] [Info] Number of positive: 1846, number of negative: 1142 [LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000206 seconds. You can set `force_col_wise=true` to remove the overhead. [LightGBM] [Info] Total Bins 1412 [LightGBM] [Info] Number of data points in the train set: 2988, number of used features: 14 [LightGBM] [Info] [binary:BoostFromScore]: pavg=0.617805 -> initscore=0.480240 [LightGBM] [Info] Start training from score 0.480240 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
GridSearchCV(cv=10, estimator=LGBMClassifier(), n_jobs=-1,
param_grid={'learning_rate': [0.1, 0.01, 0.02, 0.05],
'max_depth': [3, 4, 5, 6],
'min_child_samples': [5, 10, 20],
'n_estimators': [100, 500, 1000, 2000],
'subsample': [0.6, 0.8, 1.0]},
verbose=2)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GridSearchCV(cv=10, estimator=LGBMClassifier(), n_jobs=-1,
param_grid={'learning_rate': [0.1, 0.01, 0.02, 0.05],
'max_depth': [3, 4, 5, 6],
'min_child_samples': [5, 10, 20],
'n_estimators': [100, 500, 1000, 2000],
'subsample': [0.6, 0.8, 1.0]},
verbose=2)LGBMClassifier()
LGBMClassifier()
lgbm_cv_model.best_params_
{'learning_rate': 0.1,
'max_depth': 3,
'min_child_samples': 5,
'n_estimators': 100,
'subsample': 0.6}
lgbm = LGBMClassifier(learning_rate = 0.01,
max_depth = 3,
subsample = 0.6,
n_estimators = 100,
min_child_samples = 5)
lgbm_tuned = lgbm.fit(X_train,y_train)
[LightGBM] [Warning] Found whitespace in feature_names, replace with underlines [LightGBM] [Info] Number of positive: 1846, number of negative: 1142 [LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000067 seconds. You can set `force_row_wise=true` to remove the overhead. And if memory is not enough, you can set `force_col_wise=true`. [LightGBM] [Info] Total Bins 1412 [LightGBM] [Info] Number of data points in the train set: 2988, number of used features: 14 [LightGBM] [Info] [binary:BoostFromScore]: pavg=0.617805 -> initscore=0.480240 [LightGBM] [Info] Start training from score 0.480240 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf
y_pred = lgbm_tuned.predict(X_test)
accuracy_score(y_test, y_pred)
1.0
CatBoost¶
X = ml_df.drop('loan_status_Approved', axis=1) # Bağımsız değişkenler
y = ml_df['loan_status_Approved'] # Bağımlı değişken
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
cat_model = CatBoostClassifier().fit(X_train, y_train)
Learning rate set to 0.016441 0: learn: 0.6464533 total: 140ms remaining: 2m 19s 1: learn: 0.6023797 total: 142ms remaining: 1m 10s 2: learn: 0.5566657 total: 144ms remaining: 47.7s 3: learn: 0.5171310 total: 146ms remaining: 36.2s 4: learn: 0.4850220 total: 147ms remaining: 29.3s 5: learn: 0.4516708 total: 149ms remaining: 24.8s 6: learn: 0.4181626 total: 151ms remaining: 21.5s 7: learn: 0.3901921 total: 153ms remaining: 19s 8: learn: 0.3623896 total: 155ms remaining: 17.1s 9: learn: 0.3346459 total: 157ms remaining: 15.6s 10: learn: 0.3106111 total: 159ms remaining: 14.3s 11: learn: 0.2914515 total: 161ms remaining: 13.2s 12: learn: 0.2737354 total: 163ms remaining: 12.4s 13: learn: 0.2545953 total: 165ms remaining: 11.6s 14: learn: 0.2392659 total: 166ms remaining: 10.9s 15: learn: 0.2239485 total: 168ms remaining: 10.3s 16: learn: 0.2080996 total: 170ms remaining: 9.84s 17: learn: 0.1954131 total: 172ms remaining: 9.39s 18: learn: 0.1831998 total: 174ms remaining: 8.98s 19: learn: 0.1714466 total: 176ms remaining: 8.61s 20: learn: 0.1575300 total: 177ms remaining: 8.25s 21: learn: 0.1482057 total: 179ms remaining: 7.95s 22: learn: 0.1389805 total: 181ms remaining: 7.68s 23: learn: 0.1296014 total: 182ms remaining: 7.41s 24: learn: 0.1210415 total: 184ms remaining: 7.18s 25: learn: 0.1141888 total: 186ms remaining: 6.96s 26: learn: 0.1068103 total: 188ms remaining: 6.76s 27: learn: 0.1008792 total: 189ms remaining: 6.57s 28: learn: 0.0950560 total: 191ms remaining: 6.4s 29: learn: 0.0889986 total: 193ms remaining: 6.25s 30: learn: 0.0842616 total: 195ms remaining: 6.09s 31: learn: 0.0792629 total: 197ms remaining: 5.97s 32: learn: 0.0747382 total: 199ms remaining: 5.84s 33: learn: 0.0712969 total: 201ms remaining: 5.72s 34: learn: 0.0669940 total: 203ms remaining: 5.6s 35: learn: 0.0628704 total: 205ms remaining: 5.49s 36: learn: 0.0595447 total: 207ms remaining: 5.39s 37: learn: 0.0564644 total: 209ms remaining: 5.29s 38: learn: 0.0538554 total: 211ms remaining: 5.2s 39: learn: 0.0513948 total: 213ms remaining: 5.11s 40: learn: 0.0490338 total: 215ms remaining: 5.02s 41: learn: 0.0466817 total: 217ms remaining: 4.94s 42: learn: 0.0447170 total: 218ms remaining: 4.86s 43: learn: 0.0426239 total: 220ms remaining: 4.79s 44: learn: 0.0402544 total: 222ms remaining: 4.72s 45: learn: 0.0380981 total: 224ms remaining: 4.65s 46: learn: 0.0361809 total: 226ms remaining: 4.58s 47: learn: 0.0342099 total: 228ms remaining: 4.52s 48: learn: 0.0324962 total: 230ms remaining: 4.46s 49: learn: 0.0310525 total: 231ms remaining: 4.39s 50: learn: 0.0297677 total: 233ms remaining: 4.34s 51: learn: 0.0283897 total: 235ms remaining: 4.28s 52: learn: 0.0273426 total: 237ms remaining: 4.23s 53: learn: 0.0261094 total: 239ms remaining: 4.18s 54: learn: 0.0249256 total: 241ms remaining: 4.13s 55: learn: 0.0240106 total: 242ms remaining: 4.09s 56: learn: 0.0228148 total: 244ms remaining: 4.04s 57: learn: 0.0218644 total: 246ms remaining: 4s 58: learn: 0.0210465 total: 248ms remaining: 3.95s 59: learn: 0.0203860 total: 250ms remaining: 3.91s 60: learn: 0.0190205 total: 251ms remaining: 3.86s 61: learn: 0.0181295 total: 252ms remaining: 3.82s 62: learn: 0.0174370 total: 254ms remaining: 3.78s 63: learn: 0.0168389 total: 256ms remaining: 3.74s 64: learn: 0.0161580 total: 258ms remaining: 3.71s 65: learn: 0.0153754 total: 259ms remaining: 3.67s 66: learn: 0.0147726 total: 261ms remaining: 3.64s 67: learn: 0.0141584 total: 263ms remaining: 3.6s 68: learn: 0.0136554 total: 265ms remaining: 3.57s 69: learn: 0.0132593 total: 267ms remaining: 3.54s 70: learn: 0.0128097 total: 269ms remaining: 3.51s 71: learn: 0.0120275 total: 270ms remaining: 3.47s 72: learn: 0.0116394 total: 271ms remaining: 3.45s 73: learn: 0.0113393 total: 273ms remaining: 3.42s 74: learn: 0.0110250 total: 275ms remaining: 3.39s 75: learn: 0.0107296 total: 277ms remaining: 3.36s 76: learn: 0.0103957 total: 279ms remaining: 3.34s 77: learn: 0.0100990 total: 280ms remaining: 3.31s 78: learn: 0.0097881 total: 282ms remaining: 3.29s 79: learn: 0.0095039 total: 284ms remaining: 3.27s 80: learn: 0.0092724 total: 286ms remaining: 3.24s 81: learn: 0.0090495 total: 288ms remaining: 3.22s 82: learn: 0.0088212 total: 290ms remaining: 3.2s 83: learn: 0.0086016 total: 291ms remaining: 3.18s 84: learn: 0.0083273 total: 293ms remaining: 3.16s 85: learn: 0.0081304 total: 295ms remaining: 3.14s 86: learn: 0.0079240 total: 297ms remaining: 3.12s 87: learn: 0.0077645 total: 299ms remaining: 3.1s 88: learn: 0.0075987 total: 301ms remaining: 3.08s 89: learn: 0.0074203 total: 303ms remaining: 3.06s 90: learn: 0.0072050 total: 304ms remaining: 3.04s 91: learn: 0.0070380 total: 306ms remaining: 3.02s 92: learn: 0.0068800 total: 308ms remaining: 3s 93: learn: 0.0066866 total: 310ms remaining: 2.99s 94: learn: 0.0065262 total: 312ms remaining: 2.97s 95: learn: 0.0063769 total: 314ms remaining: 2.95s 96: learn: 0.0062348 total: 315ms remaining: 2.94s 97: learn: 0.0060803 total: 317ms remaining: 2.92s 98: learn: 0.0059428 total: 319ms remaining: 2.9s 99: learn: 0.0058191 total: 321ms remaining: 2.89s 100: learn: 0.0056962 total: 323ms remaining: 2.87s 101: learn: 0.0055848 total: 325ms remaining: 2.86s 102: learn: 0.0054635 total: 326ms remaining: 2.84s 103: learn: 0.0053628 total: 328ms remaining: 2.83s 104: learn: 0.0052481 total: 330ms remaining: 2.81s 105: learn: 0.0051218 total: 332ms remaining: 2.8s 106: learn: 0.0049842 total: 334ms remaining: 2.79s 107: learn: 0.0048950 total: 336ms remaining: 2.77s 108: learn: 0.0048036 total: 338ms remaining: 2.76s 109: learn: 0.0047307 total: 339ms remaining: 2.75s 110: learn: 0.0046274 total: 341ms remaining: 2.73s 111: learn: 0.0045438 total: 343ms remaining: 2.72s 112: learn: 0.0044424 total: 345ms remaining: 2.71s 113: learn: 0.0043532 total: 347ms remaining: 2.69s 114: learn: 0.0042806 total: 349ms remaining: 2.68s 115: learn: 0.0042128 total: 350ms remaining: 2.67s 116: learn: 0.0041446 total: 352ms remaining: 2.66s 117: learn: 0.0040765 total: 354ms remaining: 2.65s 118: learn: 0.0040027 total: 356ms remaining: 2.63s 119: learn: 0.0039109 total: 358ms remaining: 2.62s 120: learn: 0.0038422 total: 360ms remaining: 2.61s 121: learn: 0.0037853 total: 362ms remaining: 2.6s 122: learn: 0.0037125 total: 364ms remaining: 2.59s 123: learn: 0.0036305 total: 365ms remaining: 2.58s 124: learn: 0.0035739 total: 367ms remaining: 2.57s 125: learn: 0.0034915 total: 369ms remaining: 2.56s 126: learn: 0.0034207 total: 371ms remaining: 2.55s 127: learn: 0.0033473 total: 372ms remaining: 2.54s 128: learn: 0.0033113 total: 374ms remaining: 2.52s 129: learn: 0.0032567 total: 376ms remaining: 2.52s 130: learn: 0.0032004 total: 378ms remaining: 2.5s 131: learn: 0.0031405 total: 379ms remaining: 2.5s 132: learn: 0.0030755 total: 381ms remaining: 2.48s 133: learn: 0.0030243 total: 383ms remaining: 2.48s 134: learn: 0.0029628 total: 385ms remaining: 2.47s 135: learn: 0.0029228 total: 387ms remaining: 2.46s 136: learn: 0.0028774 total: 389ms remaining: 2.45s 137: learn: 0.0028360 total: 391ms remaining: 2.44s 138: learn: 0.0027906 total: 392ms remaining: 2.43s 139: learn: 0.0027589 total: 394ms remaining: 2.42s 140: learn: 0.0027129 total: 396ms remaining: 2.41s 141: learn: 0.0026700 total: 398ms remaining: 2.4s 142: learn: 0.0026261 total: 400ms remaining: 2.39s 143: learn: 0.0025782 total: 402ms remaining: 2.39s 144: learn: 0.0025366 total: 404ms remaining: 2.38s 145: learn: 0.0024990 total: 406ms remaining: 2.37s 146: learn: 0.0024730 total: 408ms remaining: 2.37s 147: learn: 0.0024353 total: 410ms remaining: 2.36s 148: learn: 0.0024029 total: 411ms remaining: 2.35s 149: learn: 0.0023637 total: 413ms remaining: 2.34s 150: learn: 0.0023300 total: 416ms remaining: 2.34s 151: learn: 0.0023107 total: 418ms remaining: 2.33s 152: learn: 0.0022769 total: 419ms remaining: 2.32s 153: learn: 0.0022531 total: 422ms remaining: 2.31s 154: learn: 0.0022287 total: 424ms remaining: 2.31s 155: learn: 0.0021999 total: 426ms remaining: 2.3s 156: learn: 0.0021744 total: 428ms remaining: 2.29s 157: learn: 0.0021601 total: 429ms remaining: 2.29s 158: learn: 0.0021286 total: 431ms remaining: 2.28s 159: learn: 0.0021036 total: 433ms remaining: 2.27s 160: learn: 0.0020765 total: 435ms remaining: 2.27s 161: learn: 0.0020371 total: 437ms remaining: 2.26s 162: learn: 0.0020094 total: 439ms remaining: 2.25s 163: learn: 0.0019701 total: 440ms remaining: 2.24s 164: learn: 0.0019549 total: 442ms remaining: 2.24s 165: learn: 0.0019282 total: 444ms remaining: 2.23s 166: learn: 0.0019086 total: 446ms remaining: 2.22s 167: learn: 0.0018906 total: 448ms remaining: 2.22s 168: learn: 0.0018377 total: 449ms remaining: 2.21s 169: learn: 0.0018188 total: 451ms remaining: 2.2s 170: learn: 0.0018037 total: 453ms remaining: 2.19s 171: learn: 0.0017876 total: 454ms remaining: 2.19s 172: learn: 0.0017668 total: 456ms remaining: 2.18s 173: learn: 0.0017487 total: 458ms remaining: 2.17s 174: learn: 0.0017295 total: 460ms remaining: 2.17s 175: learn: 0.0017131 total: 462ms remaining: 2.16s 176: learn: 0.0016937 total: 464ms remaining: 2.16s 177: learn: 0.0016736 total: 466ms remaining: 2.15s 178: learn: 0.0016552 total: 467ms remaining: 2.14s 179: learn: 0.0016415 total: 469ms remaining: 2.14s 180: learn: 0.0016151 total: 471ms remaining: 2.13s 181: learn: 0.0015990 total: 473ms remaining: 2.13s 182: learn: 0.0015800 total: 475ms remaining: 2.12s 183: learn: 0.0015635 total: 477ms remaining: 2.11s 184: learn: 0.0015469 total: 479ms remaining: 2.11s 185: learn: 0.0015336 total: 480ms remaining: 2.1s 186: learn: 0.0015186 total: 482ms remaining: 2.1s 187: learn: 0.0015026 total: 484ms remaining: 2.09s 188: learn: 0.0014860 total: 486ms remaining: 2.08s 189: learn: 0.0014738 total: 488ms remaining: 2.08s 190: learn: 0.0014582 total: 490ms remaining: 2.07s 191: learn: 0.0014416 total: 492ms remaining: 2.07s 192: learn: 0.0014251 total: 494ms remaining: 2.06s 193: learn: 0.0014124 total: 496ms remaining: 2.06s 194: learn: 0.0014002 total: 497ms remaining: 2.05s 195: learn: 0.0013874 total: 499ms remaining: 2.05s 196: learn: 0.0013773 total: 501ms remaining: 2.04s 197: learn: 0.0013656 total: 503ms remaining: 2.04s 198: learn: 0.0013540 total: 505ms remaining: 2.03s 199: learn: 0.0013433 total: 506ms remaining: 2.02s 200: learn: 0.0013311 total: 508ms remaining: 2.02s 201: learn: 0.0013179 total: 510ms remaining: 2.01s 202: learn: 0.0013051 total: 512ms remaining: 2.01s 203: learn: 0.0012973 total: 514ms remaining: 2s 204: learn: 0.0012872 total: 515ms remaining: 2s 205: learn: 0.0012720 total: 517ms remaining: 1.99s 206: learn: 0.0012602 total: 519ms remaining: 1.99s 207: learn: 0.0012497 total: 521ms remaining: 1.98s 208: learn: 0.0012349 total: 523ms remaining: 1.98s 209: learn: 0.0012213 total: 525ms remaining: 1.97s 210: learn: 0.0012147 total: 526ms remaining: 1.97s 211: learn: 0.0012052 total: 528ms remaining: 1.96s 212: learn: 0.0011942 total: 530ms remaining: 1.96s 213: learn: 0.0011858 total: 532ms remaining: 1.95s 214: learn: 0.0011772 total: 534ms remaining: 1.95s 215: learn: 0.0011701 total: 536ms remaining: 1.94s 216: learn: 0.0011615 total: 538ms remaining: 1.94s 217: learn: 0.0011517 total: 540ms remaining: 1.94s 218: learn: 0.0011435 total: 541ms remaining: 1.93s 219: learn: 0.0011332 total: 543ms remaining: 1.93s 220: learn: 0.0011232 total: 545ms remaining: 1.92s 221: learn: 0.0011139 total: 547ms remaining: 1.92s 222: learn: 0.0011025 total: 549ms remaining: 1.91s 223: learn: 0.0010941 total: 551ms remaining: 1.91s 224: learn: 0.0010841 total: 552ms remaining: 1.9s 225: learn: 0.0010776 total: 554ms remaining: 1.9s 226: learn: 0.0010776 total: 556ms remaining: 1.89s 227: learn: 0.0010776 total: 557ms remaining: 1.89s 228: learn: 0.0010696 total: 559ms remaining: 1.88s 229: learn: 0.0010624 total: 561ms remaining: 1.88s 230: learn: 0.0010555 total: 563ms remaining: 1.87s 231: learn: 0.0010488 total: 565ms remaining: 1.87s 232: learn: 0.0010400 total: 566ms remaining: 1.86s 233: learn: 0.0010400 total: 568ms remaining: 1.86s 234: learn: 0.0010309 total: 570ms remaining: 1.85s 235: learn: 0.0010309 total: 571ms remaining: 1.85s 236: learn: 0.0010229 total: 573ms remaining: 1.84s 237: learn: 0.0010229 total: 575ms remaining: 1.84s 238: learn: 0.0010164 total: 577ms remaining: 1.84s 239: learn: 0.0010087 total: 579ms remaining: 1.83s 240: learn: 0.0010021 total: 581ms remaining: 1.83s 241: learn: 0.0010021 total: 582ms remaining: 1.82s 242: learn: 0.0009934 total: 584ms remaining: 1.82s 243: learn: 0.0009934 total: 586ms remaining: 1.81s 244: learn: 0.0009855 total: 587ms remaining: 1.81s 245: learn: 0.0009836 total: 589ms remaining: 1.8s 246: learn: 0.0009836 total: 591ms remaining: 1.8s 247: learn: 0.0009836 total: 592ms remaining: 1.8s 248: learn: 0.0009780 total: 594ms remaining: 1.79s 249: learn: 0.0009710 total: 596ms remaining: 1.79s 250: learn: 0.0009622 total: 598ms remaining: 1.78s 251: learn: 0.0009622 total: 600ms remaining: 1.78s 252: learn: 0.0009550 total: 602ms remaining: 1.78s 253: learn: 0.0009492 total: 603ms remaining: 1.77s 254: learn: 0.0009492 total: 605ms remaining: 1.77s 255: learn: 0.0009492 total: 607ms remaining: 1.76s 256: learn: 0.0009492 total: 608ms remaining: 1.76s 257: learn: 0.0009492 total: 610ms remaining: 1.75s 258: learn: 0.0009424 total: 612ms remaining: 1.75s 259: learn: 0.0009363 total: 614ms remaining: 1.75s 260: learn: 0.0009296 total: 616ms remaining: 1.74s 261: learn: 0.0009228 total: 618ms remaining: 1.74s 262: learn: 0.0009228 total: 619ms remaining: 1.74s 263: learn: 0.0009155 total: 621ms remaining: 1.73s 264: learn: 0.0009086 total: 623ms remaining: 1.73s 265: learn: 0.0009086 total: 625ms remaining: 1.72s 266: learn: 0.0009086 total: 626ms remaining: 1.72s 267: learn: 0.0009086 total: 628ms remaining: 1.71s 268: learn: 0.0009086 total: 629ms remaining: 1.71s 269: learn: 0.0009013 total: 631ms remaining: 1.71s 270: learn: 0.0008944 total: 633ms remaining: 1.7s 271: learn: 0.0008944 total: 635ms remaining: 1.7s 272: learn: 0.0008944 total: 636ms remaining: 1.69s 273: learn: 0.0008944 total: 638ms remaining: 1.69s 274: learn: 0.0008944 total: 639ms remaining: 1.69s 275: learn: 0.0008944 total: 641ms remaining: 1.68s 276: learn: 0.0008869 total: 643ms remaining: 1.68s 277: learn: 0.0008869 total: 644ms remaining: 1.67s 278: learn: 0.0008869 total: 646ms remaining: 1.67s 279: learn: 0.0008869 total: 647ms remaining: 1.66s 280: learn: 0.0008869 total: 649ms remaining: 1.66s 281: learn: 0.0008795 total: 651ms remaining: 1.66s 282: learn: 0.0008719 total: 653ms remaining: 1.65s 283: learn: 0.0008719 total: 654ms remaining: 1.65s 284: learn: 0.0008719 total: 656ms remaining: 1.65s 285: learn: 0.0008719 total: 658ms remaining: 1.64s 286: learn: 0.0008719 total: 659ms remaining: 1.64s 287: learn: 0.0008719 total: 661ms remaining: 1.63s 288: learn: 0.0008641 total: 662ms remaining: 1.63s 289: learn: 0.0008640 total: 664ms remaining: 1.63s 290: learn: 0.0008557 total: 665ms remaining: 1.62s 291: learn: 0.0008557 total: 667ms remaining: 1.62s 292: learn: 0.0008557 total: 668ms remaining: 1.61s 293: learn: 0.0008558 total: 670ms remaining: 1.61s 294: learn: 0.0008505 total: 672ms remaining: 1.6s 295: learn: 0.0008504 total: 673ms remaining: 1.6s 296: learn: 0.0008504 total: 675ms remaining: 1.6s 297: learn: 0.0008504 total: 676ms remaining: 1.59s 298: learn: 0.0008504 total: 678ms remaining: 1.59s 299: learn: 0.0008504 total: 680ms remaining: 1.58s 300: learn: 0.0008448 total: 681ms remaining: 1.58s 301: learn: 0.0008448 total: 683ms remaining: 1.58s 302: learn: 0.0008448 total: 685ms remaining: 1.57s 303: learn: 0.0008448 total: 686ms remaining: 1.57s 304: learn: 0.0008448 total: 688ms remaining: 1.57s 305: learn: 0.0008448 total: 689ms remaining: 1.56s 306: learn: 0.0008448 total: 691ms remaining: 1.56s 307: learn: 0.0008448 total: 693ms remaining: 1.56s 308: learn: 0.0008448 total: 694ms remaining: 1.55s 309: learn: 0.0008448 total: 696ms remaining: 1.55s 310: learn: 0.0008448 total: 697ms remaining: 1.54s 311: learn: 0.0008447 total: 699ms remaining: 1.54s 312: learn: 0.0008448 total: 700ms remaining: 1.54s 313: learn: 0.0008447 total: 702ms remaining: 1.53s 314: learn: 0.0008447 total: 704ms remaining: 1.53s 315: learn: 0.0008447 total: 705ms remaining: 1.53s 316: learn: 0.0008447 total: 706ms remaining: 1.52s 317: learn: 0.0008447 total: 708ms remaining: 1.52s 318: learn: 0.0008447 total: 709ms remaining: 1.51s 319: learn: 0.0008447 total: 711ms remaining: 1.51s 320: learn: 0.0008447 total: 713ms remaining: 1.51s 321: learn: 0.0008447 total: 714ms remaining: 1.5s 322: learn: 0.0008447 total: 716ms remaining: 1.5s 323: learn: 0.0008447 total: 717ms remaining: 1.5s 324: learn: 0.0008447 total: 719ms remaining: 1.49s 325: learn: 0.0008447 total: 720ms remaining: 1.49s 326: learn: 0.0008447 total: 722ms remaining: 1.49s 327: learn: 0.0008447 total: 723ms remaining: 1.48s 328: learn: 0.0008447 total: 725ms remaining: 1.48s 329: learn: 0.0008447 total: 727ms remaining: 1.48s 330: learn: 0.0008447 total: 728ms remaining: 1.47s 331: learn: 0.0008447 total: 730ms remaining: 1.47s 332: learn: 0.0008447 total: 731ms remaining: 1.46s 333: learn: 0.0008447 total: 733ms remaining: 1.46s 334: learn: 0.0008447 total: 735ms remaining: 1.46s 335: learn: 0.0008447 total: 736ms remaining: 1.45s 336: learn: 0.0008447 total: 738ms remaining: 1.45s 337: learn: 0.0008447 total: 739ms remaining: 1.45s 338: learn: 0.0008447 total: 741ms remaining: 1.44s 339: learn: 0.0008447 total: 742ms remaining: 1.44s 340: learn: 0.0008447 total: 744ms remaining: 1.44s 341: learn: 0.0008446 total: 745ms remaining: 1.43s 342: learn: 0.0008447 total: 747ms remaining: 1.43s 343: learn: 0.0008447 total: 749ms remaining: 1.43s 344: learn: 0.0008395 total: 751ms remaining: 1.43s 345: learn: 0.0008327 total: 753ms remaining: 1.42s 346: learn: 0.0008327 total: 754ms remaining: 1.42s 347: learn: 0.0008327 total: 756ms remaining: 1.42s 348: learn: 0.0008327 total: 757ms remaining: 1.41s 349: learn: 0.0008327 total: 759ms remaining: 1.41s 350: learn: 0.0008327 total: 760ms remaining: 1.41s 351: learn: 0.0008327 total: 762ms remaining: 1.4s 352: learn: 0.0008327 total: 763ms remaining: 1.4s 353: learn: 0.0008327 total: 765ms remaining: 1.4s 354: learn: 0.0008326 total: 766ms remaining: 1.39s 355: learn: 0.0008326 total: 768ms remaining: 1.39s 356: learn: 0.0008326 total: 770ms remaining: 1.39s 357: learn: 0.0008326 total: 771ms remaining: 1.38s 358: learn: 0.0008326 total: 773ms remaining: 1.38s 359: learn: 0.0008326 total: 774ms remaining: 1.38s 360: learn: 0.0008260 total: 776ms remaining: 1.37s 361: learn: 0.0008260 total: 778ms remaining: 1.37s 362: learn: 0.0008260 total: 779ms remaining: 1.37s 363: learn: 0.0008260 total: 781ms remaining: 1.36s 364: learn: 0.0008260 total: 783ms remaining: 1.36s 365: learn: 0.0008260 total: 784ms remaining: 1.36s 366: learn: 0.0008260 total: 786ms remaining: 1.35s 367: learn: 0.0008260 total: 787ms remaining: 1.35s 368: learn: 0.0008260 total: 789ms remaining: 1.35s 369: learn: 0.0008260 total: 790ms remaining: 1.34s 370: learn: 0.0008260 total: 792ms remaining: 1.34s 371: learn: 0.0008260 total: 794ms remaining: 1.34s 372: learn: 0.0008260 total: 795ms remaining: 1.34s 373: learn: 0.0008260 total: 797ms remaining: 1.33s 374: learn: 0.0008260 total: 799ms remaining: 1.33s 375: learn: 0.0008260 total: 800ms remaining: 1.33s 376: learn: 0.0008259 total: 802ms remaining: 1.32s 377: learn: 0.0008204 total: 804ms remaining: 1.32s 378: learn: 0.0008204 total: 805ms remaining: 1.32s 379: learn: 0.0008204 total: 807ms remaining: 1.32s 380: learn: 0.0008204 total: 809ms remaining: 1.31s 381: learn: 0.0008204 total: 810ms remaining: 1.31s 382: learn: 0.0008204 total: 812ms remaining: 1.31s 383: learn: 0.0008203 total: 814ms remaining: 1.3s 384: learn: 0.0008204 total: 815ms remaining: 1.3s 385: learn: 0.0008203 total: 817ms remaining: 1.3s 386: learn: 0.0008204 total: 819ms remaining: 1.3s 387: learn: 0.0008203 total: 821ms remaining: 1.29s 388: learn: 0.0008203 total: 822ms remaining: 1.29s 389: learn: 0.0008203 total: 824ms remaining: 1.29s 390: learn: 0.0008137 total: 826ms remaining: 1.29s 391: learn: 0.0008136 total: 828ms remaining: 1.28s 392: learn: 0.0008137 total: 829ms remaining: 1.28s 393: learn: 0.0008137 total: 831ms remaining: 1.28s 394: learn: 0.0008136 total: 833ms remaining: 1.27s 395: learn: 0.0008136 total: 834ms remaining: 1.27s 396: learn: 0.0008136 total: 836ms remaining: 1.27s 397: learn: 0.0008136 total: 838ms remaining: 1.27s 398: learn: 0.0008136 total: 839ms remaining: 1.26s 399: learn: 0.0008136 total: 841ms remaining: 1.26s 400: learn: 0.0008136 total: 843ms remaining: 1.26s 401: learn: 0.0008136 total: 844ms remaining: 1.25s 402: learn: 0.0008136 total: 846ms remaining: 1.25s 403: learn: 0.0008136 total: 848ms remaining: 1.25s 404: learn: 0.0008136 total: 849ms remaining: 1.25s 405: learn: 0.0008136 total: 851ms remaining: 1.24s 406: learn: 0.0008136 total: 853ms remaining: 1.24s 407: learn: 0.0008136 total: 854ms remaining: 1.24s 408: learn: 0.0008136 total: 856ms remaining: 1.24s 409: learn: 0.0008136 total: 858ms remaining: 1.23s 410: learn: 0.0008136 total: 859ms remaining: 1.23s 411: learn: 0.0008136 total: 861ms remaining: 1.23s 412: learn: 0.0008136 total: 863ms remaining: 1.23s 413: learn: 0.0008136 total: 864ms remaining: 1.22s 414: learn: 0.0008136 total: 866ms remaining: 1.22s 415: learn: 0.0008136 total: 868ms remaining: 1.22s 416: learn: 0.0008136 total: 869ms remaining: 1.22s 417: learn: 0.0008136 total: 871ms remaining: 1.21s 418: learn: 0.0008070 total: 873ms remaining: 1.21s 419: learn: 0.0008070 total: 875ms remaining: 1.21s 420: learn: 0.0008070 total: 876ms remaining: 1.21s 421: learn: 0.0008070 total: 878ms remaining: 1.2s 422: learn: 0.0008069 total: 880ms remaining: 1.2s 423: learn: 0.0008070 total: 881ms remaining: 1.2s 424: learn: 0.0008069 total: 883ms remaining: 1.19s 425: learn: 0.0008069 total: 885ms remaining: 1.19s 426: learn: 0.0008047 total: 887ms remaining: 1.19s 427: learn: 0.0008047 total: 888ms remaining: 1.19s 428: learn: 0.0008047 total: 890ms remaining: 1.18s 429: learn: 0.0008047 total: 892ms remaining: 1.18s 430: learn: 0.0007970 total: 894ms remaining: 1.18s 431: learn: 0.0007970 total: 895ms remaining: 1.18s 432: learn: 0.0007970 total: 897ms remaining: 1.17s 433: learn: 0.0007970 total: 899ms remaining: 1.17s 434: learn: 0.0007970 total: 900ms remaining: 1.17s 435: learn: 0.0007970 total: 902ms remaining: 1.17s 436: learn: 0.0007970 total: 904ms remaining: 1.16s 437: learn: 0.0007970 total: 906ms remaining: 1.16s 438: learn: 0.0007970 total: 907ms remaining: 1.16s 439: learn: 0.0007970 total: 909ms remaining: 1.16s 440: learn: 0.0007970 total: 911ms remaining: 1.15s 441: learn: 0.0007970 total: 913ms remaining: 1.15s 442: learn: 0.0007970 total: 915ms remaining: 1.15s 443: learn: 0.0007970 total: 917ms remaining: 1.15s 444: learn: 0.0007970 total: 919ms remaining: 1.15s 445: learn: 0.0007970 total: 920ms remaining: 1.14s 446: learn: 0.0007970 total: 922ms remaining: 1.14s 447: learn: 0.0007970 total: 924ms remaining: 1.14s 448: learn: 0.0007970 total: 925ms remaining: 1.14s 449: learn: 0.0007970 total: 927ms remaining: 1.13s 450: learn: 0.0007970 total: 929ms remaining: 1.13s 451: learn: 0.0007970 total: 931ms remaining: 1.13s 452: learn: 0.0007970 total: 933ms remaining: 1.13s 453: learn: 0.0007970 total: 934ms remaining: 1.12s 454: learn: 0.0007970 total: 936ms remaining: 1.12s 455: learn: 0.0007970 total: 938ms remaining: 1.12s 456: learn: 0.0007970 total: 940ms remaining: 1.12s 457: learn: 0.0007970 total: 942ms remaining: 1.11s 458: learn: 0.0007970 total: 944ms remaining: 1.11s 459: learn: 0.0007970 total: 946ms remaining: 1.11s 460: learn: 0.0007970 total: 948ms remaining: 1.11s 461: learn: 0.0007970 total: 950ms remaining: 1.1s 462: learn: 0.0007970 total: 951ms remaining: 1.1s 463: learn: 0.0007970 total: 953ms remaining: 1.1s 464: learn: 0.0007970 total: 955ms remaining: 1.1s 465: learn: 0.0007970 total: 957ms remaining: 1.1s 466: learn: 0.0007908 total: 959ms remaining: 1.09s 467: learn: 0.0007907 total: 961ms remaining: 1.09s 468: learn: 0.0007907 total: 963ms remaining: 1.09s 469: learn: 0.0007907 total: 965ms remaining: 1.09s 470: learn: 0.0007907 total: 966ms remaining: 1.08s 471: learn: 0.0007907 total: 968ms remaining: 1.08s 472: learn: 0.0007907 total: 970ms remaining: 1.08s 473: learn: 0.0007907 total: 972ms remaining: 1.08s 474: learn: 0.0007907 total: 975ms remaining: 1.08s 475: learn: 0.0007907 total: 976ms remaining: 1.07s 476: learn: 0.0007907 total: 979ms remaining: 1.07s 477: learn: 0.0007907 total: 981ms remaining: 1.07s 478: learn: 0.0007907 total: 982ms remaining: 1.07s 479: learn: 0.0007907 total: 984ms remaining: 1.07s 480: learn: 0.0007907 total: 986ms remaining: 1.06s 481: learn: 0.0007907 total: 988ms remaining: 1.06s 482: learn: 0.0007907 total: 991ms remaining: 1.06s 483: learn: 0.0007907 total: 992ms remaining: 1.06s 484: learn: 0.0007907 total: 995ms remaining: 1.06s 485: learn: 0.0007907 total: 997ms remaining: 1.05s 486: learn: 0.0007907 total: 998ms remaining: 1.05s 487: learn: 0.0007907 total: 1s remaining: 1.05s 488: learn: 0.0007907 total: 1s remaining: 1.05s 489: learn: 0.0007907 total: 1s remaining: 1.04s 490: learn: 0.0007907 total: 1s remaining: 1.04s 491: learn: 0.0007907 total: 1.01s remaining: 1.04s 492: learn: 0.0007907 total: 1.01s remaining: 1.04s 493: learn: 0.0007907 total: 1.01s remaining: 1.04s 494: learn: 0.0007907 total: 1.01s remaining: 1.03s 495: learn: 0.0007907 total: 1.01s remaining: 1.03s 496: learn: 0.0007907 total: 1.02s remaining: 1.03s 497: learn: 0.0007907 total: 1.02s remaining: 1.03s 498: learn: 0.0007907 total: 1.02s remaining: 1.02s 499: learn: 0.0007906 total: 1.02s remaining: 1.02s 500: learn: 0.0007906 total: 1.02s remaining: 1.02s 501: learn: 0.0007906 total: 1.02s remaining: 1.02s 502: learn: 0.0007906 total: 1.03s remaining: 1.01s 503: learn: 0.0007906 total: 1.03s remaining: 1.01s 504: learn: 0.0007906 total: 1.03s remaining: 1.01s 505: learn: 0.0007906 total: 1.03s remaining: 1.01s 506: learn: 0.0007906 total: 1.03s remaining: 1s 507: learn: 0.0007906 total: 1.03s remaining: 1s 508: learn: 0.0007906 total: 1.04s remaining: 1s 509: learn: 0.0007906 total: 1.04s remaining: 999ms 510: learn: 0.0007906 total: 1.04s remaining: 996ms 511: learn: 0.0007906 total: 1.04s remaining: 994ms 512: learn: 0.0007906 total: 1.04s remaining: 992ms 513: learn: 0.0007906 total: 1.05s remaining: 989ms 514: learn: 0.0007906 total: 1.05s remaining: 987ms 515: learn: 0.0007906 total: 1.05s remaining: 985ms 516: learn: 0.0007906 total: 1.05s remaining: 982ms 517: learn: 0.0007906 total: 1.05s remaining: 980ms 518: learn: 0.0007906 total: 1.05s remaining: 978ms 519: learn: 0.0007906 total: 1.06s remaining: 975ms 520: learn: 0.0007906 total: 1.06s remaining: 973ms 521: learn: 0.0007906 total: 1.06s remaining: 970ms 522: learn: 0.0007906 total: 1.06s remaining: 968ms 523: learn: 0.0007906 total: 1.06s remaining: 966ms 524: learn: 0.0007906 total: 1.06s remaining: 963ms 525: learn: 0.0007906 total: 1.07s remaining: 961ms 526: learn: 0.0007906 total: 1.07s remaining: 959ms 527: learn: 0.0007906 total: 1.07s remaining: 956ms 528: learn: 0.0007905 total: 1.07s remaining: 954ms 529: learn: 0.0007906 total: 1.07s remaining: 952ms 530: learn: 0.0007905 total: 1.07s remaining: 949ms 531: learn: 0.0007905 total: 1.08s remaining: 947ms 532: learn: 0.0007905 total: 1.08s remaining: 945ms 533: learn: 0.0007905 total: 1.08s remaining: 942ms 534: learn: 0.0007905 total: 1.08s remaining: 940ms 535: learn: 0.0007905 total: 1.08s remaining: 938ms 536: learn: 0.0007905 total: 1.08s remaining: 935ms 537: learn: 0.0007905 total: 1.09s remaining: 933ms 538: learn: 0.0007905 total: 1.09s remaining: 931ms 539: learn: 0.0007905 total: 1.09s remaining: 928ms 540: learn: 0.0007905 total: 1.09s remaining: 926ms 541: learn: 0.0007905 total: 1.09s remaining: 924ms 542: learn: 0.0007905 total: 1.09s remaining: 921ms 543: learn: 0.0007905 total: 1.1s remaining: 919ms 544: learn: 0.0007905 total: 1.1s remaining: 917ms 545: learn: 0.0007905 total: 1.1s remaining: 915ms 546: learn: 0.0007905 total: 1.1s remaining: 912ms 547: learn: 0.0007905 total: 1.1s remaining: 910ms 548: learn: 0.0007905 total: 1.1s remaining: 908ms 549: learn: 0.0007905 total: 1.11s remaining: 905ms 550: learn: 0.0007905 total: 1.11s remaining: 903ms 551: learn: 0.0007905 total: 1.11s remaining: 901ms 552: learn: 0.0007905 total: 1.11s remaining: 899ms 553: learn: 0.0007905 total: 1.11s remaining: 896ms 554: learn: 0.0007905 total: 1.11s remaining: 894ms 555: learn: 0.0007905 total: 1.12s remaining: 892ms 556: learn: 0.0007905 total: 1.12s remaining: 890ms 557: learn: 0.0007905 total: 1.12s remaining: 887ms 558: learn: 0.0007905 total: 1.12s remaining: 885ms 559: learn: 0.0007904 total: 1.12s remaining: 883ms 560: learn: 0.0007904 total: 1.13s remaining: 881ms 561: learn: 0.0007904 total: 1.13s remaining: 878ms 562: learn: 0.0007904 total: 1.13s remaining: 876ms 563: learn: 0.0007904 total: 1.13s remaining: 874ms 564: learn: 0.0007904 total: 1.13s remaining: 871ms 565: learn: 0.0007904 total: 1.13s remaining: 869ms 566: learn: 0.0007904 total: 1.14s remaining: 867ms 567: learn: 0.0007904 total: 1.14s remaining: 865ms 568: learn: 0.0007904 total: 1.14s remaining: 862ms 569: learn: 0.0007904 total: 1.14s remaining: 860ms 570: learn: 0.0007904 total: 1.14s remaining: 858ms 571: learn: 0.0007904 total: 1.14s remaining: 856ms 572: learn: 0.0007904 total: 1.15s remaining: 854ms 573: learn: 0.0007904 total: 1.15s remaining: 852ms 574: learn: 0.0007904 total: 1.15s remaining: 849ms 575: learn: 0.0007904 total: 1.15s remaining: 847ms 576: learn: 0.0007904 total: 1.15s remaining: 845ms 577: learn: 0.0007904 total: 1.15s remaining: 843ms 578: learn: 0.0007904 total: 1.16s remaining: 841ms 579: learn: 0.0007904 total: 1.16s remaining: 839ms 580: learn: 0.0007904 total: 1.16s remaining: 836ms 581: learn: 0.0007904 total: 1.16s remaining: 834ms 582: learn: 0.0007904 total: 1.16s remaining: 832ms 583: learn: 0.0007904 total: 1.16s remaining: 830ms 584: learn: 0.0007904 total: 1.17s remaining: 828ms 585: learn: 0.0007904 total: 1.17s remaining: 825ms 586: learn: 0.0007904 total: 1.17s remaining: 823ms 587: learn: 0.0007904 total: 1.17s remaining: 821ms 588: learn: 0.0007904 total: 1.17s remaining: 819ms 589: learn: 0.0007904 total: 1.18s remaining: 817ms 590: learn: 0.0007903 total: 1.18s remaining: 814ms 591: learn: 0.0007904 total: 1.18s remaining: 812ms 592: learn: 0.0007904 total: 1.18s remaining: 810ms 593: learn: 0.0007903 total: 1.18s remaining: 808ms 594: learn: 0.0007903 total: 1.18s remaining: 806ms 595: learn: 0.0007903 total: 1.19s remaining: 804ms 596: learn: 0.0007903 total: 1.19s remaining: 801ms 597: learn: 0.0007903 total: 1.19s remaining: 799ms 598: learn: 0.0007903 total: 1.19s remaining: 797ms 599: learn: 0.0007903 total: 1.19s remaining: 795ms 600: learn: 0.0007903 total: 1.19s remaining: 793ms 601: learn: 0.0007903 total: 1.2s remaining: 791ms 602: learn: 0.0007903 total: 1.2s remaining: 789ms 603: learn: 0.0007903 total: 1.2s remaining: 787ms 604: learn: 0.0007903 total: 1.2s remaining: 784ms 605: learn: 0.0007903 total: 1.2s remaining: 782ms 606: learn: 0.0007903 total: 1.2s remaining: 780ms 607: learn: 0.0007903 total: 1.21s remaining: 778ms 608: learn: 0.0007903 total: 1.21s remaining: 776ms 609: learn: 0.0007903 total: 1.21s remaining: 773ms 610: learn: 0.0007903 total: 1.21s remaining: 771ms 611: learn: 0.0007903 total: 1.21s remaining: 769ms 612: learn: 0.0007903 total: 1.21s remaining: 767ms 613: learn: 0.0007903 total: 1.22s remaining: 765ms 614: learn: 0.0007903 total: 1.22s remaining: 763ms 615: learn: 0.0007903 total: 1.22s remaining: 761ms 616: learn: 0.0007903 total: 1.22s remaining: 759ms 617: learn: 0.0007903 total: 1.22s remaining: 756ms 618: learn: 0.0007903 total: 1.23s remaining: 754ms 619: learn: 0.0007903 total: 1.23s remaining: 752ms 620: learn: 0.0007903 total: 1.23s remaining: 752ms 621: learn: 0.0007903 total: 1.24s remaining: 751ms 622: learn: 0.0007903 total: 1.24s remaining: 751ms 623: learn: 0.0007903 total: 1.24s remaining: 750ms 624: learn: 0.0007902 total: 1.25s remaining: 748ms 625: learn: 0.0007902 total: 1.25s remaining: 746ms 626: learn: 0.0007902 total: 1.25s remaining: 745ms 627: learn: 0.0007902 total: 1.25s remaining: 743ms 628: learn: 0.0007902 total: 1.26s remaining: 741ms 629: learn: 0.0007902 total: 1.26s remaining: 739ms 630: learn: 0.0007902 total: 1.26s remaining: 737ms 631: learn: 0.0007902 total: 1.26s remaining: 735ms 632: learn: 0.0007902 total: 1.26s remaining: 733ms 633: learn: 0.0007902 total: 1.27s remaining: 731ms 634: learn: 0.0007902 total: 1.27s remaining: 729ms 635: learn: 0.0007902 total: 1.27s remaining: 727ms 636: learn: 0.0007902 total: 1.27s remaining: 725ms 637: learn: 0.0007902 total: 1.27s remaining: 723ms 638: learn: 0.0007902 total: 1.27s remaining: 721ms 639: learn: 0.0007902 total: 1.28s remaining: 718ms 640: learn: 0.0007902 total: 1.28s remaining: 716ms 641: learn: 0.0007902 total: 1.28s remaining: 714ms 642: learn: 0.0007902 total: 1.28s remaining: 712ms 643: learn: 0.0007902 total: 1.28s remaining: 710ms 644: learn: 0.0007902 total: 1.28s remaining: 708ms 645: learn: 0.0007902 total: 1.29s remaining: 706ms 646: learn: 0.0007902 total: 1.29s remaining: 704ms 647: learn: 0.0007902 total: 1.29s remaining: 701ms 648: learn: 0.0007902 total: 1.29s remaining: 699ms 649: learn: 0.0007902 total: 1.29s remaining: 697ms 650: learn: 0.0007902 total: 1.3s remaining: 695ms 651: learn: 0.0007902 total: 1.3s remaining: 693ms 652: learn: 0.0007901 total: 1.3s remaining: 691ms 653: learn: 0.0007901 total: 1.3s remaining: 688ms 654: learn: 0.0007902 total: 1.3s remaining: 686ms 655: learn: 0.0007901 total: 1.3s remaining: 684ms 656: learn: 0.0007901 total: 1.31s remaining: 682ms 657: learn: 0.0007901 total: 1.31s remaining: 680ms 658: learn: 0.0007901 total: 1.31s remaining: 678ms 659: learn: 0.0007901 total: 1.31s remaining: 676ms 660: learn: 0.0007901 total: 1.31s remaining: 674ms 661: learn: 0.0007901 total: 1.31s remaining: 671ms 662: learn: 0.0007901 total: 1.32s remaining: 669ms 663: learn: 0.0007901 total: 1.32s remaining: 667ms 664: learn: 0.0007901 total: 1.32s remaining: 665ms 665: learn: 0.0007901 total: 1.32s remaining: 663ms 666: learn: 0.0007901 total: 1.32s remaining: 661ms 667: learn: 0.0007901 total: 1.32s remaining: 659ms 668: learn: 0.0007901 total: 1.33s remaining: 657ms 669: learn: 0.0007901 total: 1.33s remaining: 654ms 670: learn: 0.0007901 total: 1.33s remaining: 652ms 671: learn: 0.0007901 total: 1.33s remaining: 650ms 672: learn: 0.0007901 total: 1.33s remaining: 648ms 673: learn: 0.0007901 total: 1.33s remaining: 646ms 674: learn: 0.0007901 total: 1.34s remaining: 644ms 675: learn: 0.0007901 total: 1.34s remaining: 642ms 676: learn: 0.0007901 total: 1.34s remaining: 640ms 677: learn: 0.0007901 total: 1.34s remaining: 638ms 678: learn: 0.0007901 total: 1.34s remaining: 635ms 679: learn: 0.0007901 total: 1.34s remaining: 633ms 680: learn: 0.0007901 total: 1.35s remaining: 631ms 681: learn: 0.0007901 total: 1.35s remaining: 629ms 682: learn: 0.0007901 total: 1.35s remaining: 627ms 683: learn: 0.0007901 total: 1.35s remaining: 625ms 684: learn: 0.0007900 total: 1.35s remaining: 623ms 685: learn: 0.0007901 total: 1.36s remaining: 621ms 686: learn: 0.0007900 total: 1.36s remaining: 619ms 687: learn: 0.0007900 total: 1.36s remaining: 617ms 688: learn: 0.0007900 total: 1.36s remaining: 615ms 689: learn: 0.0007900 total: 1.36s remaining: 613ms 690: learn: 0.0007900 total: 1.36s remaining: 610ms 691: learn: 0.0007900 total: 1.37s remaining: 608ms 692: learn: 0.0007900 total: 1.37s remaining: 606ms 693: learn: 0.0007900 total: 1.37s remaining: 604ms 694: learn: 0.0007900 total: 1.37s remaining: 602ms 695: learn: 0.0007900 total: 1.37s remaining: 600ms 696: learn: 0.0007900 total: 1.37s remaining: 598ms 697: learn: 0.0007900 total: 1.38s remaining: 596ms 698: learn: 0.0007900 total: 1.38s remaining: 593ms 699: learn: 0.0007900 total: 1.38s remaining: 591ms 700: learn: 0.0007900 total: 1.38s remaining: 589ms 701: learn: 0.0007900 total: 1.38s remaining: 587ms 702: learn: 0.0007900 total: 1.38s remaining: 585ms 703: learn: 0.0007900 total: 1.39s remaining: 583ms 704: learn: 0.0007900 total: 1.39s remaining: 581ms 705: learn: 0.0007900 total: 1.39s remaining: 579ms 706: learn: 0.0007900 total: 1.39s remaining: 577ms 707: learn: 0.0007900 total: 1.39s remaining: 574ms 708: learn: 0.0007900 total: 1.39s remaining: 572ms 709: learn: 0.0007900 total: 1.4s remaining: 570ms 710: learn: 0.0007899 total: 1.4s remaining: 568ms 711: learn: 0.0007899 total: 1.4s remaining: 566ms 712: learn: 0.0007900 total: 1.4s remaining: 564ms 713: learn: 0.0007899 total: 1.4s remaining: 562ms 714: learn: 0.0007899 total: 1.4s remaining: 560ms 715: learn: 0.0007899 total: 1.41s remaining: 558ms 716: learn: 0.0007899 total: 1.41s remaining: 556ms 717: learn: 0.0007899 total: 1.41s remaining: 554ms 718: learn: 0.0007899 total: 1.41s remaining: 552ms 719: learn: 0.0007899 total: 1.41s remaining: 550ms 720: learn: 0.0007899 total: 1.42s remaining: 548ms 721: learn: 0.0007899 total: 1.42s remaining: 545ms 722: learn: 0.0007899 total: 1.42s remaining: 543ms 723: learn: 0.0007899 total: 1.42s remaining: 541ms 724: learn: 0.0007899 total: 1.42s remaining: 539ms 725: learn: 0.0007899 total: 1.42s remaining: 537ms 726: learn: 0.0007899 total: 1.43s remaining: 535ms 727: learn: 0.0007899 total: 1.43s remaining: 533ms 728: learn: 0.0007899 total: 1.43s remaining: 531ms 729: learn: 0.0007899 total: 1.43s remaining: 529ms 730: learn: 0.0007899 total: 1.43s remaining: 527ms 731: learn: 0.0007899 total: 1.43s remaining: 525ms 732: learn: 0.0007899 total: 1.44s remaining: 523ms 733: learn: 0.0007899 total: 1.44s remaining: 521ms 734: learn: 0.0007899 total: 1.44s remaining: 519ms 735: learn: 0.0007899 total: 1.44s remaining: 517ms 736: learn: 0.0007898 total: 1.44s remaining: 515ms 737: learn: 0.0007899 total: 1.44s remaining: 513ms 738: learn: 0.0007899 total: 1.45s remaining: 511ms 739: learn: 0.0007898 total: 1.45s remaining: 509ms 740: learn: 0.0007898 total: 1.45s remaining: 507ms 741: learn: 0.0007899 total: 1.45s remaining: 505ms 742: learn: 0.0007899 total: 1.45s remaining: 502ms 743: learn: 0.0007898 total: 1.45s remaining: 500ms 744: learn: 0.0007898 total: 1.46s remaining: 498ms 745: learn: 0.0007898 total: 1.46s remaining: 496ms 746: learn: 0.0007898 total: 1.46s remaining: 494ms 747: learn: 0.0007898 total: 1.46s remaining: 492ms 748: learn: 0.0007898 total: 1.46s remaining: 490ms 749: learn: 0.0007898 total: 1.46s remaining: 488ms 750: learn: 0.0007898 total: 1.47s remaining: 486ms 751: learn: 0.0007898 total: 1.47s remaining: 484ms 752: learn: 0.0007898 total: 1.47s remaining: 482ms 753: learn: 0.0007898 total: 1.47s remaining: 480ms 754: learn: 0.0007898 total: 1.47s remaining: 478ms 755: learn: 0.0007898 total: 1.47s remaining: 476ms 756: learn: 0.0007898 total: 1.48s remaining: 474ms 757: learn: 0.0007898 total: 1.48s remaining: 472ms 758: learn: 0.0007898 total: 1.48s remaining: 470ms 759: learn: 0.0007898 total: 1.48s remaining: 468ms 760: learn: 0.0007897 total: 1.48s remaining: 466ms 761: learn: 0.0007897 total: 1.48s remaining: 463ms 762: learn: 0.0007897 total: 1.49s remaining: 461ms 763: learn: 0.0007897 total: 1.49s remaining: 459ms 764: learn: 0.0007897 total: 1.49s remaining: 457ms 765: learn: 0.0007897 total: 1.49s remaining: 455ms 766: learn: 0.0007897 total: 1.49s remaining: 453ms 767: learn: 0.0007897 total: 1.49s remaining: 451ms 768: learn: 0.0007897 total: 1.5s remaining: 449ms 769: learn: 0.0007897 total: 1.5s remaining: 447ms 770: learn: 0.0007897 total: 1.5s remaining: 445ms 771: learn: 0.0007897 total: 1.5s remaining: 443ms 772: learn: 0.0007897 total: 1.5s remaining: 441ms 773: learn: 0.0007897 total: 1.5s remaining: 439ms 774: learn: 0.0007897 total: 1.51s remaining: 437ms 775: learn: 0.0007897 total: 1.51s remaining: 435ms 776: learn: 0.0007897 total: 1.51s remaining: 433ms 777: learn: 0.0007897 total: 1.51s remaining: 431ms 778: learn: 0.0007897 total: 1.51s remaining: 429ms 779: learn: 0.0007897 total: 1.51s remaining: 427ms 780: learn: 0.0007897 total: 1.52s remaining: 425ms 781: learn: 0.0007897 total: 1.52s remaining: 423ms 782: learn: 0.0007897 total: 1.52s remaining: 421ms 783: learn: 0.0007897 total: 1.52s remaining: 419ms 784: learn: 0.0007897 total: 1.52s remaining: 417ms 785: learn: 0.0007896 total: 1.52s remaining: 415ms 786: learn: 0.0007896 total: 1.53s remaining: 413ms 787: learn: 0.0007896 total: 1.53s remaining: 411ms 788: learn: 0.0007896 total: 1.53s remaining: 409ms 789: learn: 0.0007896 total: 1.53s remaining: 407ms 790: learn: 0.0007896 total: 1.53s remaining: 405ms 791: learn: 0.0007896 total: 1.53s remaining: 403ms 792: learn: 0.0007896 total: 1.53s remaining: 401ms 793: learn: 0.0007896 total: 1.54s remaining: 399ms 794: learn: 0.0007896 total: 1.54s remaining: 397ms 795: learn: 0.0007896 total: 1.54s remaining: 395ms 796: learn: 0.0007896 total: 1.54s remaining: 393ms 797: learn: 0.0007896 total: 1.54s remaining: 391ms 798: learn: 0.0007896 total: 1.54s remaining: 389ms 799: learn: 0.0007896 total: 1.55s remaining: 387ms 800: learn: 0.0007896 total: 1.55s remaining: 385ms 801: learn: 0.0007896 total: 1.55s remaining: 383ms 802: learn: 0.0007896 total: 1.55s remaining: 381ms 803: learn: 0.0007896 total: 1.55s remaining: 379ms 804: learn: 0.0007896 total: 1.55s remaining: 377ms 805: learn: 0.0007896 total: 1.56s remaining: 375ms 806: learn: 0.0007896 total: 1.56s remaining: 373ms 807: learn: 0.0007895 total: 1.56s remaining: 371ms 808: learn: 0.0007895 total: 1.56s remaining: 369ms 809: learn: 0.0007895 total: 1.56s remaining: 367ms 810: learn: 0.0007895 total: 1.56s remaining: 365ms 811: learn: 0.0007896 total: 1.57s remaining: 363ms 812: learn: 0.0007895 total: 1.57s remaining: 361ms 813: learn: 0.0007895 total: 1.57s remaining: 359ms 814: learn: 0.0007895 total: 1.57s remaining: 357ms 815: learn: 0.0007895 total: 1.57s remaining: 355ms 816: learn: 0.0007895 total: 1.57s remaining: 353ms 817: learn: 0.0007895 total: 1.58s remaining: 351ms 818: learn: 0.0007895 total: 1.58s remaining: 349ms 819: learn: 0.0007895 total: 1.58s remaining: 347ms 820: learn: 0.0007895 total: 1.58s remaining: 345ms 821: learn: 0.0007895 total: 1.58s remaining: 343ms 822: learn: 0.0007895 total: 1.58s remaining: 341ms 823: learn: 0.0007895 total: 1.59s remaining: 339ms 824: learn: 0.0007895 total: 1.59s remaining: 337ms 825: learn: 0.0007895 total: 1.59s remaining: 335ms 826: learn: 0.0007895 total: 1.59s remaining: 333ms 827: learn: 0.0007895 total: 1.59s remaining: 331ms 828: learn: 0.0007895 total: 1.6s remaining: 329ms 829: learn: 0.0007895 total: 1.6s remaining: 327ms 830: learn: 0.0007895 total: 1.6s remaining: 325ms 831: learn: 0.0007894 total: 1.6s remaining: 323ms 832: learn: 0.0007894 total: 1.6s remaining: 321ms 833: learn: 0.0007894 total: 1.6s remaining: 319ms 834: learn: 0.0007894 total: 1.61s remaining: 317ms 835: learn: 0.0007894 total: 1.61s remaining: 315ms 836: learn: 0.0007894 total: 1.61s remaining: 313ms 837: learn: 0.0007894 total: 1.61s remaining: 312ms 838: learn: 0.0007894 total: 1.61s remaining: 310ms 839: learn: 0.0007894 total: 1.61s remaining: 308ms 840: learn: 0.0007894 total: 1.62s remaining: 306ms 841: learn: 0.0007894 total: 1.62s remaining: 304ms 842: learn: 0.0007894 total: 1.62s remaining: 302ms 843: learn: 0.0007894 total: 1.62s remaining: 300ms 844: learn: 0.0007894 total: 1.62s remaining: 298ms 845: learn: 0.0007894 total: 1.63s remaining: 296ms 846: learn: 0.0007894 total: 1.63s remaining: 294ms 847: learn: 0.0007894 total: 1.63s remaining: 292ms 848: learn: 0.0007894 total: 1.63s remaining: 290ms 849: learn: 0.0007894 total: 1.63s remaining: 288ms 850: learn: 0.0007894 total: 1.63s remaining: 286ms 851: learn: 0.0007894 total: 1.64s remaining: 284ms 852: learn: 0.0007894 total: 1.64s remaining: 282ms 853: learn: 0.0007894 total: 1.64s remaining: 280ms 854: learn: 0.0007894 total: 1.64s remaining: 278ms 855: learn: 0.0007894 total: 1.64s remaining: 276ms 856: learn: 0.0007893 total: 1.64s remaining: 274ms 857: learn: 0.0007893 total: 1.65s remaining: 272ms 858: learn: 0.0007893 total: 1.65s remaining: 270ms 859: learn: 0.0007893 total: 1.65s remaining: 268ms 860: learn: 0.0007893 total: 1.65s remaining: 266ms 861: learn: 0.0007893 total: 1.65s remaining: 265ms 862: learn: 0.0007893 total: 1.65s remaining: 263ms 863: learn: 0.0007893 total: 1.66s remaining: 261ms 864: learn: 0.0007893 total: 1.66s remaining: 259ms 865: learn: 0.0007893 total: 1.66s remaining: 257ms 866: learn: 0.0007893 total: 1.66s remaining: 255ms 867: learn: 0.0007893 total: 1.66s remaining: 253ms 868: learn: 0.0007893 total: 1.66s remaining: 251ms 869: learn: 0.0007893 total: 1.67s remaining: 249ms 870: learn: 0.0007893 total: 1.67s remaining: 247ms 871: learn: 0.0007893 total: 1.67s remaining: 245ms 872: learn: 0.0007892 total: 1.67s remaining: 243ms 873: learn: 0.0007893 total: 1.67s remaining: 241ms 874: learn: 0.0007892 total: 1.67s remaining: 239ms 875: learn: 0.0007892 total: 1.68s remaining: 237ms 876: learn: 0.0007892 total: 1.68s remaining: 235ms 877: learn: 0.0007892 total: 1.68s remaining: 233ms 878: learn: 0.0007892 total: 1.68s remaining: 231ms 879: learn: 0.0007892 total: 1.68s remaining: 229ms 880: learn: 0.0007892 total: 1.68s remaining: 228ms 881: learn: 0.0007892 total: 1.69s remaining: 226ms 882: learn: 0.0007892 total: 1.69s remaining: 224ms 883: learn: 0.0007892 total: 1.69s remaining: 222ms 884: learn: 0.0007892 total: 1.69s remaining: 220ms 885: learn: 0.0007892 total: 1.69s remaining: 218ms 886: learn: 0.0007892 total: 1.69s remaining: 216ms 887: learn: 0.0007892 total: 1.7s remaining: 214ms 888: learn: 0.0007892 total: 1.7s remaining: 212ms 889: learn: 0.0007892 total: 1.7s remaining: 210ms 890: learn: 0.0007892 total: 1.7s remaining: 208ms 891: learn: 0.0007892 total: 1.7s remaining: 206ms 892: learn: 0.0007892 total: 1.7s remaining: 204ms 893: learn: 0.0007892 total: 1.71s remaining: 202ms 894: learn: 0.0007892 total: 1.71s remaining: 200ms 895: learn: 0.0007892 total: 1.71s remaining: 198ms 896: learn: 0.0007892 total: 1.71s remaining: 197ms 897: learn: 0.0007892 total: 1.71s remaining: 195ms 898: learn: 0.0007891 total: 1.71s remaining: 193ms 899: learn: 0.0007892 total: 1.72s remaining: 191ms 900: learn: 0.0007892 total: 1.72s remaining: 189ms 901: learn: 0.0007891 total: 1.72s remaining: 187ms 902: learn: 0.0007891 total: 1.72s remaining: 185ms 903: learn: 0.0007891 total: 1.72s remaining: 183ms 904: learn: 0.0007891 total: 1.72s remaining: 181ms 905: learn: 0.0007891 total: 1.73s remaining: 179ms 906: learn: 0.0007891 total: 1.73s remaining: 177ms 907: learn: 0.0007891 total: 1.73s remaining: 175ms 908: learn: 0.0007891 total: 1.73s remaining: 173ms 909: learn: 0.0007891 total: 1.73s remaining: 171ms 910: learn: 0.0007891 total: 1.73s remaining: 169ms 911: learn: 0.0007891 total: 1.74s remaining: 168ms 912: learn: 0.0007891 total: 1.74s remaining: 166ms 913: learn: 0.0007891 total: 1.74s remaining: 164ms 914: learn: 0.0007891 total: 1.74s remaining: 162ms 915: learn: 0.0007891 total: 1.74s remaining: 160ms 916: learn: 0.0007891 total: 1.74s remaining: 158ms 917: learn: 0.0007891 total: 1.75s remaining: 156ms 918: learn: 0.0007891 total: 1.75s remaining: 154ms 919: learn: 0.0007891 total: 1.75s remaining: 152ms 920: learn: 0.0007891 total: 1.75s remaining: 150ms 921: learn: 0.0007891 total: 1.75s remaining: 148ms 922: learn: 0.0007891 total: 1.75s remaining: 146ms 923: learn: 0.0007891 total: 1.76s remaining: 144ms 924: learn: 0.0007891 total: 1.76s remaining: 143ms 925: learn: 0.0007891 total: 1.76s remaining: 141ms 926: learn: 0.0007891 total: 1.76s remaining: 139ms 927: learn: 0.0007891 total: 1.76s remaining: 137ms 928: learn: 0.0007891 total: 1.76s remaining: 135ms 929: learn: 0.0007891 total: 1.77s remaining: 133ms 930: learn: 0.0007891 total: 1.77s remaining: 131ms 931: learn: 0.0007891 total: 1.77s remaining: 129ms 932: learn: 0.0007890 total: 1.77s remaining: 127ms 933: learn: 0.0007890 total: 1.77s remaining: 125ms 934: learn: 0.0007890 total: 1.77s remaining: 123ms 935: learn: 0.0007890 total: 1.78s remaining: 121ms 936: learn: 0.0007890 total: 1.78s remaining: 120ms 937: learn: 0.0007890 total: 1.78s remaining: 118ms 938: learn: 0.0007890 total: 1.78s remaining: 116ms 939: learn: 0.0007890 total: 1.78s remaining: 114ms 940: learn: 0.0007890 total: 1.78s remaining: 112ms 941: learn: 0.0007890 total: 1.79s remaining: 110ms 942: learn: 0.0007890 total: 1.79s remaining: 108ms 943: learn: 0.0007890 total: 1.79s remaining: 106ms 944: learn: 0.0007890 total: 1.79s remaining: 104ms 945: learn: 0.0007890 total: 1.79s remaining: 102ms 946: learn: 0.0007890 total: 1.79s remaining: 100ms 947: learn: 0.0007890 total: 1.8s remaining: 98.6ms 948: learn: 0.0007890 total: 1.8s remaining: 96.7ms 949: learn: 0.0007890 total: 1.8s remaining: 94.8ms 950: learn: 0.0007890 total: 1.8s remaining: 92.9ms 951: learn: 0.0007890 total: 1.8s remaining: 91ms 952: learn: 0.0007890 total: 1.8s remaining: 89.1ms 953: learn: 0.0007890 total: 1.81s remaining: 87.1ms 954: learn: 0.0007890 total: 1.81s remaining: 85.3ms 955: learn: 0.0007890 total: 1.81s remaining: 83.3ms 956: learn: 0.0007890 total: 1.81s remaining: 81.4ms 957: learn: 0.0007889 total: 1.81s remaining: 79.5ms 958: learn: 0.0007890 total: 1.82s remaining: 77.6ms 959: learn: 0.0007890 total: 1.82s remaining: 75.7ms 960: learn: 0.0007890 total: 1.82s remaining: 73.8ms 961: learn: 0.0007889 total: 1.82s remaining: 71.9ms 962: learn: 0.0007890 total: 1.82s remaining: 70ms 963: learn: 0.0007890 total: 1.82s remaining: 68.1ms 964: learn: 0.0007889 total: 1.83s remaining: 66.3ms 965: learn: 0.0007889 total: 1.83s remaining: 64.4ms 966: learn: 0.0007889 total: 1.83s remaining: 62.5ms 967: learn: 0.0007889 total: 1.83s remaining: 60.6ms 968: learn: 0.0007889 total: 1.83s remaining: 58.7ms 969: learn: 0.0007889 total: 1.83s remaining: 56.8ms 970: learn: 0.0007889 total: 1.84s remaining: 54.9ms 971: learn: 0.0007889 total: 1.84s remaining: 53ms 972: learn: 0.0007889 total: 1.84s remaining: 51.1ms 973: learn: 0.0007889 total: 1.84s remaining: 49.2ms 974: learn: 0.0007889 total: 1.84s remaining: 47.3ms 975: learn: 0.0007889 total: 1.84s remaining: 45.4ms 976: learn: 0.0007889 total: 1.85s remaining: 43.5ms 977: learn: 0.0007889 total: 1.85s remaining: 41.6ms 978: learn: 0.0007889 total: 1.85s remaining: 39.7ms 979: learn: 0.0007889 total: 1.85s remaining: 37.8ms 980: learn: 0.0007889 total: 1.85s remaining: 35.9ms 981: learn: 0.0007889 total: 1.85s remaining: 34ms 982: learn: 0.0007889 total: 1.86s remaining: 32.1ms 983: learn: 0.0007889 total: 1.86s remaining: 30.2ms 984: learn: 0.0007889 total: 1.86s remaining: 28.3ms 985: learn: 0.0007889 total: 1.86s remaining: 26.4ms 986: learn: 0.0007889 total: 1.86s remaining: 24.5ms 987: learn: 0.0007889 total: 1.86s remaining: 22.7ms 988: learn: 0.0007889 total: 1.87s remaining: 20.8ms 989: learn: 0.0007889 total: 1.87s remaining: 18.9ms 990: learn: 0.0007889 total: 1.87s remaining: 17ms 991: learn: 0.0007889 total: 1.87s remaining: 15.1ms 992: learn: 0.0007888 total: 1.87s remaining: 13.2ms 993: learn: 0.0007889 total: 1.88s remaining: 11.3ms 994: learn: 0.0007889 total: 1.88s remaining: 9.43ms 995: learn: 0.0007888 total: 1.88s remaining: 7.54ms 996: learn: 0.0007888 total: 1.88s remaining: 5.66ms 997: learn: 0.0007888 total: 1.88s remaining: 3.77ms 998: learn: 0.0007888 total: 1.88s remaining: 1.89ms 999: learn: 0.0007888 total: 1.89s remaining: 0us
y_pred = cat_model.predict(X_test)
accuracy_score(y_test, y_pred)
1.0
catb_params = {
'iterations': [200,500],
'learning_rate': [0.01,0.05, 0.1],
'depth': [3,5,8] }
catb = CatBoostClassifier()
catb_cv_model = GridSearchCV(catb, catb_params, cv=5, n_jobs = -1, verbose = 2)
catb_cv_model.fit(X_train, y_train)
catb_cv_model.best_params_
Fitting 5 folds for each of 18 candidates, totalling 90 fits 0: learn: 0.6595468 total: 1.28ms remaining: 256ms 1: learn: 0.6259209 total: 2.59ms remaining: 257ms 2: learn: 0.5941751 total: 3.8ms remaining: 250ms 3: learn: 0.5632290 total: 4.91ms remaining: 241ms 4: learn: 0.5366301 total: 6.11ms remaining: 238ms 5: learn: 0.5097550 total: 7.29ms remaining: 236ms 6: learn: 0.4841281 total: 8.51ms remaining: 235ms 7: learn: 0.4569797 total: 9.53ms remaining: 229ms 8: learn: 0.4344650 total: 10.7ms remaining: 227ms 9: learn: 0.4130139 total: 12ms remaining: 227ms 10: learn: 0.3899785 total: 13.1ms remaining: 226ms 11: learn: 0.3708218 total: 14.3ms remaining: 225ms 12: learn: 0.3524314 total: 15.5ms remaining: 223ms 13: learn: 0.3349809 total: 16.7ms remaining: 222ms 14: learn: 0.3169636 total: 17.9ms remaining: 221ms 15: learn: 0.3002906 total: 19ms remaining: 219ms 16: learn: 0.2854770 total: 20.2ms remaining: 217ms 17: learn: 0.2713017 total: 21.4ms remaining: 216ms 18: learn: 0.2579750 total: 22.6ms remaining: 215ms 19: learn: 0.2449128 total: 23.8ms remaining: 214ms 20: learn: 0.2325563 total: 25ms remaining: 213ms 21: learn: 0.2213074 total: 26.2ms remaining: 212ms 22: learn: 0.2105813 total: 27.4ms remaining: 211ms 23: learn: 0.2004724 total: 28.7ms remaining: 211ms 24: learn: 0.1908160 total: 29.9ms remaining: 210ms 25: learn: 0.1808626 total: 31.2ms remaining: 209ms 26: learn: 0.1722519 total: 32.4ms remaining: 208ms 27: learn: 0.1638434 total: 33.7ms remaining: 207ms 28: learn: 0.1554835 total: 34.9ms remaining: 206ms 29: learn: 0.1477724 total: 36.1ms remaining: 205ms 30: learn: 0.1404945 total: 37.3ms remaining: 203ms 31: learn: 0.1335736 total: 38.5ms remaining: 202ms 32: learn: 0.1269453 total: 39.7ms remaining: 201ms 33: learn: 0.1208221 total: 40.9ms remaining: 200ms 34: learn: 0.1153437 total: 42.2ms remaining: 199ms 35: learn: 0.1098383 total: 43.3ms remaining: 197ms 36: learn: 0.1048613 total: 44.7ms remaining: 197ms 37: learn: 0.1001649 total: 45.8ms remaining: 195ms 38: learn: 0.0956594 total: 47ms remaining: 194ms 39: learn: 0.0914333 total: 48.3ms remaining: 193ms 40: learn: 0.0874844 total: 49.5ms remaining: 192ms 41: learn: 0.0836310 total: 50.7ms remaining: 191ms 42: learn: 0.0800624 total: 51.9ms remaining: 190ms 43: learn: 0.0768335 total: 53.2ms remaining: 188ms 44: learn: 0.0734384 total: 54.4ms remaining: 187ms 45: learn: 0.0703208 total: 55.5ms remaining: 186ms 46: learn: 0.0672899 total: 56.7ms remaining: 184ms 47: learn: 0.0643721 total: 57.9ms remaining: 183ms 48: learn: 0.0615388 total: 59.2ms remaining: 182ms 49: learn: 0.0590989 total: 60.4ms remaining: 181ms 50: learn: 0.0567096 total: 61.6ms remaining: 180ms 51: learn: 0.0544305 total: 62.8ms remaining: 179ms 52: learn: 0.0523452 total: 64ms remaining: 177ms 53: learn: 0.0498063 total: 65.1ms remaining: 176ms 54: learn: 0.0476854 total: 66.2ms remaining: 175ms 55: learn: 0.0458659 total: 67.5ms remaining: 174ms 56: learn: 0.0439035 total: 68.8ms remaining: 173ms 57: learn: 0.0421772 total: 70.1ms remaining: 172ms 58: learn: 0.0403579 total: 71.4ms remaining: 171ms 59: learn: 0.0387628 total: 72.6ms remaining: 169ms 60: learn: 0.0371524 total: 73.8ms remaining: 168ms 61: learn: 0.0356815 total: 75.2ms remaining: 167ms 62: learn: 0.0343486 total: 76.5ms remaining: 166ms 63: learn: 0.0329325 total: 77.7ms remaining: 165ms 64: learn: 0.0315918 total: 79ms remaining: 164ms 65: learn: 0.0304244 total: 80.3ms remaining: 163ms 66: learn: 0.0292175 total: 81.6ms remaining: 162ms 67: learn: 0.0282400 total: 82.7ms remaining: 161ms 68: learn: 0.0272602 total: 84ms remaining: 159ms 69: learn: 0.0262688 total: 85.2ms remaining: 158ms 70: learn: 0.0254206 total: 86.3ms remaining: 157ms 71: learn: 0.0245386 total: 87.4ms remaining: 155ms 72: learn: 0.0236300 total: 88.6ms remaining: 154ms 73: learn: 0.0228265 total: 89.8ms remaining: 153ms 74: learn: 0.0221196 total: 91.1ms remaining: 152ms 75: learn: 0.0213787 total: 92.3ms remaining: 151ms 76: learn: 0.0204883 total: 93.4ms remaining: 149ms 77: learn: 0.0198340 total: 94.6ms remaining: 148ms 78: learn: 0.0190185 total: 95.7ms remaining: 147ms 79: learn: 0.0184044 total: 96.9ms remaining: 145ms 80: learn: 0.0178198 total: 98ms remaining: 144ms 81: learn: 0.0172466 total: 99.2ms remaining: 143ms 82: learn: 0.0167424 total: 100ms remaining: 141ms 83: learn: 0.0162306 total: 102ms remaining: 140ms 84: learn: 0.0156537 total: 103ms remaining: 139ms 85: learn: 0.0151420 total: 104ms remaining: 138ms 86: learn: 0.0146110 total: 105ms remaining: 137ms 87: learn: 0.0140455 total: 106ms remaining: 135ms 88: learn: 0.0135373 total: 107ms remaining: 134ms 89: learn: 0.0131767 total: 109ms remaining: 133ms 90: learn: 0.0127442 total: 110ms remaining: 132ms 91: learn: 0.0123519 total: 111ms remaining: 130ms 92: learn: 0.0120279 total: 112ms remaining: 129ms 93: learn: 0.0116475 total: 113ms remaining: 128ms 94: learn: 0.0112956 total: 115ms remaining: 127ms 95: learn: 0.0109911 total: 116ms remaining: 126ms 96: learn: 0.0106855 total: 117ms remaining: 124ms 97: learn: 0.0103866 total: 118ms remaining: 123ms 98: learn: 0.0100137 total: 119ms remaining: 122ms 99: learn: 0.0096872 total: 121ms remaining: 121ms 100: learn: 0.0094532 total: 122ms remaining: 119ms 101: learn: 0.0092026 total: 123ms remaining: 118ms 102: learn: 0.0089828 total: 124ms remaining: 117ms 103: learn: 0.0087387 total: 126ms remaining: 116ms 104: learn: 0.0085131 total: 127ms remaining: 115ms 105: learn: 0.0082750 total: 128ms remaining: 114ms 106: learn: 0.0080488 total: 129ms remaining: 112ms 107: learn: 0.0078430 total: 131ms remaining: 111ms 108: learn: 0.0076567 total: 132ms remaining: 110ms 109: learn: 0.0074510 total: 133ms remaining: 109ms 110: learn: 0.0072476 total: 134ms remaining: 108ms 111: learn: 0.0070855 total: 135ms remaining: 106ms 112: learn: 0.0069208 total: 136ms remaining: 105ms 113: learn: 0.0067316 total: 138ms remaining: 104ms 114: learn: 0.0065632 total: 139ms remaining: 103ms 115: learn: 0.0064088 total: 140ms remaining: 101ms 116: learn: 0.0062443 total: 141ms remaining: 100ms 117: learn: 0.0061037 total: 142ms remaining: 99ms 118: learn: 0.0059522 total: 144ms remaining: 97.7ms 119: learn: 0.0058117 total: 145ms remaining: 96.5ms 120: learn: 0.0056876 total: 146ms remaining: 95.3ms 121: learn: 0.0055594 total: 147ms remaining: 94ms 122: learn: 0.0054362 total: 148ms remaining: 92.8ms 123: learn: 0.0052925 total: 149ms remaining: 91.6ms 124: learn: 0.0051599 total: 151ms remaining: 90.3ms 125: learn: 0.0050524 total: 152ms remaining: 89.1ms 126: learn: 0.0049303 total: 153ms remaining: 88ms 127: learn: 0.0048181 total: 154ms remaining: 86.8ms 128: learn: 0.0047235 total: 156ms remaining: 85.6ms 129: learn: 0.0046187 total: 157ms remaining: 84.4ms 130: learn: 0.0045162 total: 158ms remaining: 83.2ms 131: learn: 0.0044202 total: 159ms remaining: 82.1ms 132: learn: 0.0043386 total: 161ms remaining: 80.9ms 133: learn: 0.0042540 total: 162ms remaining: 79.7ms 134: learn: 0.0041706 total: 163ms remaining: 78.5ms 135: learn: 0.0040881 total: 164ms remaining: 77.3ms 136: learn: 0.0040048 total: 165ms remaining: 76.1ms 137: learn: 0.0039251 total: 167ms remaining: 74.9ms 138: learn: 0.0038349 total: 168ms remaining: 73.7ms 139: learn: 0.0037611 total: 169ms remaining: 72.5ms 140: learn: 0.0036952 total: 170ms remaining: 71.3ms 141: learn: 0.0036313 total: 172ms remaining: 70.1ms 142: learn: 0.0035579 total: 173ms remaining: 68.9ms 143: learn: 0.0034892 total: 174ms remaining: 67.7ms 144: learn: 0.0034148 total: 175ms remaining: 66.5ms 145: learn: 0.0033443 total: 176ms remaining: 65.3ms 146: learn: 0.0032847 total: 178ms remaining: 64ms 147: learn: 0.0032149 total: 179ms remaining: 62.8ms 148: learn: 0.0031560 total: 180ms remaining: 61.6ms 149: learn: 0.0031024 total: 181ms remaining: 60.4ms 150: learn: 0.0030363 total: 182ms remaining: 59.2ms 151: learn: 0.0029826 total: 184ms remaining: 58ms 152: learn: 0.0029267 total: 185ms remaining: 56.8ms 153: learn: 0.0028785 total: 186ms remaining: 55.6ms 154: learn: 0.0028146 total: 187ms remaining: 54.4ms 155: learn: 0.0027677 total: 189ms remaining: 53.2ms 156: learn: 0.0027276 total: 190ms remaining: 52ms 157: learn: 0.0026715 total: 191ms remaining: 50.7ms 158: learn: 0.0026246 total: 192ms remaining: 49.5ms 159: learn: 0.0025827 total: 193ms remaining: 48.3ms 160: learn: 0.0025274 total: 194ms remaining: 47.1ms 161: learn: 0.0024871 total: 196ms remaining: 45.9ms 162: learn: 0.0024466 total: 197ms remaining: 44.7ms 163: learn: 0.0023988 total: 198ms remaining: 43.4ms 164: learn: 0.0023560 total: 199ms remaining: 42.2ms 165: learn: 0.0023219 total: 200ms remaining: 41ms 166: learn: 0.0022850 total: 201ms remaining: 39.8ms 167: learn: 0.0022475 total: 203ms remaining: 38.6ms 168: learn: 0.0022118 total: 204ms remaining: 37.4ms 169: learn: 0.0021777 total: 205ms remaining: 36.2ms 170: learn: 0.0021421 total: 206ms remaining: 35ms 171: learn: 0.0021023 total: 208ms remaining: 33.8ms 172: learn: 0.0020732 total: 209ms remaining: 32.6ms 173: learn: 0.0020376 total: 210ms remaining: 31.4ms 174: learn: 0.0020070 total: 212ms remaining: 30.2ms 175: learn: 0.0019800 total: 213ms remaining: 29ms 176: learn: 0.0019505 total: 214ms remaining: 27.8ms 177: learn: 0.0019204 total: 215ms remaining: 26.6ms 178: learn: 0.0018917 total: 217ms remaining: 25.4ms 179: learn: 0.0018650 total: 218ms remaining: 24.2ms 180: learn: 0.0018381 total: 219ms remaining: 23ms 181: learn: 0.0018168 total: 221ms remaining: 21.8ms 182: learn: 0.0017910 total: 222ms remaining: 20.6ms 183: learn: 0.0017680 total: 223ms remaining: 19.4ms 184: learn: 0.0017472 total: 224ms remaining: 18.2ms 185: learn: 0.0017226 total: 226ms remaining: 17ms 186: learn: 0.0016991 total: 227ms remaining: 15.8ms 187: learn: 0.0016752 total: 228ms remaining: 14.6ms 188: learn: 0.0016481 total: 230ms remaining: 13.4ms 189: learn: 0.0016252 total: 231ms remaining: 12.2ms 190: learn: 0.0016045 total: 232ms remaining: 10.9ms 191: learn: 0.0015870 total: 234ms remaining: 9.74ms 192: learn: 0.0015656 total: 235ms remaining: 8.52ms 193: learn: 0.0015454 total: 236ms remaining: 7.31ms 194: learn: 0.0015228 total: 237ms remaining: 6.09ms 195: learn: 0.0015058 total: 239ms remaining: 4.87ms 196: learn: 0.0014873 total: 240ms remaining: 3.65ms 197: learn: 0.0014695 total: 241ms remaining: 2.44ms 198: learn: 0.0014515 total: 243ms remaining: 1.22ms 199: learn: 0.0014340 total: 244ms remaining: 0us
{'depth': 3, 'iterations': 200, 'learning_rate': 0.01}
catb_cv_model.best_params_
{'depth': 3, 'iterations': 200, 'learning_rate': 0.01}
catb = CatBoostClassifier(iterations = 200,
learning_rate = 0.01,
depth = 8)
catb_tuned = catb.fit(X_train, y_train)
y_pred = catb_tuned.predict(X_test)
0: learn: 0.6674337 total: 3.68ms remaining: 732ms 1: learn: 0.6386795 total: 5.65ms remaining: 560ms 2: learn: 0.6116826 total: 8.82ms remaining: 579ms 3: learn: 0.5815681 total: 10.3ms remaining: 506ms 4: learn: 0.5605616 total: 15.4ms remaining: 602ms 5: learn: 0.5327449 total: 21.4ms remaining: 693ms 6: learn: 0.5095491 total: 25.3ms remaining: 696ms 7: learn: 0.4884327 total: 30ms remaining: 719ms 8: learn: 0.4610596 total: 31.4ms remaining: 666ms 9: learn: 0.4432132 total: 36.2ms remaining: 687ms 10: learn: 0.4201282 total: 37.7ms remaining: 648ms 11: learn: 0.4051588 total: 41.2ms remaining: 645ms 12: learn: 0.3856329 total: 43.3ms remaining: 622ms 13: learn: 0.3648093 total: 44.8ms remaining: 595ms 14: learn: 0.3500280 total: 48.8ms remaining: 602ms 15: learn: 0.3348621 total: 51.2ms remaining: 589ms 16: learn: 0.3203382 total: 54.4ms remaining: 586ms 17: learn: 0.3073090 total: 57.8ms remaining: 584ms 18: learn: 0.2958863 total: 61.2ms remaining: 583ms 19: learn: 0.2840642 total: 64.4ms remaining: 580ms 20: learn: 0.2740001 total: 67.6ms remaining: 577ms 21: learn: 0.2631915 total: 71ms remaining: 575ms 22: learn: 0.2533232 total: 74.4ms remaining: 573ms 23: learn: 0.2441702 total: 77.9ms remaining: 571ms 24: learn: 0.2356840 total: 81.1ms remaining: 568ms 25: learn: 0.2273163 total: 84.5ms remaining: 565ms 26: learn: 0.2181152 total: 87.6ms remaining: 561ms 27: learn: 0.2112030 total: 90.7ms remaining: 557ms 28: learn: 0.2040427 total: 94ms remaining: 554ms 29: learn: 0.1940757 total: 95.4ms remaining: 541ms 30: learn: 0.1867837 total: 98.6ms remaining: 537ms 31: learn: 0.1809468 total: 102ms remaining: 534ms 32: learn: 0.1749376 total: 105ms remaining: 530ms 33: learn: 0.1693432 total: 108ms remaining: 527ms 34: learn: 0.1625870 total: 110ms remaining: 518ms 35: learn: 0.1573514 total: 113ms remaining: 514ms 36: learn: 0.1520892 total: 116ms remaining: 511ms 37: learn: 0.1450636 total: 117ms remaining: 500ms 38: learn: 0.1409237 total: 121ms remaining: 497ms 39: learn: 0.1357986 total: 123ms remaining: 493ms 40: learn: 0.1312019 total: 126ms remaining: 490ms 41: learn: 0.1263063 total: 129ms remaining: 487ms 42: learn: 0.1219785 total: 133ms remaining: 484ms 43: learn: 0.1175381 total: 134ms remaining: 477ms 44: learn: 0.1112586 total: 136ms remaining: 467ms 45: learn: 0.1061615 total: 137ms remaining: 458ms 46: learn: 0.1034262 total: 140ms remaining: 455ms 47: learn: 0.0993954 total: 143ms remaining: 452ms 48: learn: 0.0955029 total: 144ms remaining: 445ms 49: learn: 0.0925824 total: 148ms remaining: 443ms 50: learn: 0.0905601 total: 151ms remaining: 440ms 51: learn: 0.0874680 total: 153ms remaining: 435ms 52: learn: 0.0853901 total: 156ms remaining: 433ms 53: learn: 0.0819556 total: 158ms remaining: 427ms 54: learn: 0.0787584 total: 161ms remaining: 424ms 55: learn: 0.0766212 total: 164ms remaining: 422ms 56: learn: 0.0742543 total: 167ms remaining: 419ms 57: learn: 0.0720224 total: 170ms remaining: 416ms 58: learn: 0.0697375 total: 173ms remaining: 414ms 59: learn: 0.0677118 total: 176ms remaining: 411ms 60: learn: 0.0660644 total: 179ms remaining: 409ms 61: learn: 0.0639365 total: 182ms remaining: 406ms 62: learn: 0.0623641 total: 186ms remaining: 404ms 63: learn: 0.0608367 total: 189ms remaining: 401ms 64: learn: 0.0588202 total: 192ms remaining: 398ms 65: learn: 0.0574737 total: 195ms remaining: 396ms 66: learn: 0.0563396 total: 198ms remaining: 393ms 67: learn: 0.0548423 total: 201ms remaining: 390ms 68: learn: 0.0532666 total: 203ms remaining: 386ms 69: learn: 0.0515519 total: 207ms remaining: 384ms 70: learn: 0.0502591 total: 210ms remaining: 381ms 71: learn: 0.0490098 total: 213ms remaining: 379ms 72: learn: 0.0467149 total: 214ms remaining: 373ms 73: learn: 0.0457303 total: 218ms remaining: 371ms 74: learn: 0.0445577 total: 221ms remaining: 368ms 75: learn: 0.0432739 total: 224ms remaining: 366ms 76: learn: 0.0423363 total: 228ms remaining: 364ms 77: learn: 0.0415304 total: 231ms remaining: 361ms 78: learn: 0.0404405 total: 234ms remaining: 358ms 79: learn: 0.0393911 total: 237ms remaining: 355ms 80: learn: 0.0384185 total: 240ms remaining: 352ms 81: learn: 0.0372283 total: 243ms remaining: 350ms 82: learn: 0.0365177 total: 246ms remaining: 347ms 83: learn: 0.0355646 total: 249ms remaining: 344ms 84: learn: 0.0347002 total: 252ms remaining: 341ms 85: learn: 0.0337786 total: 255ms remaining: 338ms 86: learn: 0.0329459 total: 258ms remaining: 335ms 87: learn: 0.0318274 total: 260ms remaining: 331ms 88: learn: 0.0311323 total: 263ms remaining: 328ms 89: learn: 0.0303998 total: 266ms remaining: 325ms 90: learn: 0.0297805 total: 269ms remaining: 322ms 91: learn: 0.0289198 total: 271ms remaining: 318ms 92: learn: 0.0282585 total: 274ms remaining: 315ms 93: learn: 0.0276928 total: 277ms remaining: 312ms 94: learn: 0.0271543 total: 280ms remaining: 310ms 95: learn: 0.0262780 total: 282ms remaining: 305ms 96: learn: 0.0256569 total: 284ms remaining: 302ms 97: learn: 0.0250403 total: 288ms remaining: 299ms 98: learn: 0.0244318 total: 291ms remaining: 296ms 99: learn: 0.0238450 total: 293ms remaining: 293ms 100: learn: 0.0232284 total: 296ms remaining: 290ms 101: learn: 0.0227745 total: 299ms remaining: 287ms 102: learn: 0.0224504 total: 302ms remaining: 284ms 103: learn: 0.0219963 total: 305ms remaining: 282ms 104: learn: 0.0216729 total: 308ms remaining: 279ms 105: learn: 0.0211069 total: 310ms remaining: 275ms 106: learn: 0.0207598 total: 313ms remaining: 272ms 107: learn: 0.0204186 total: 316ms remaining: 269ms 108: learn: 0.0200790 total: 319ms remaining: 266ms 109: learn: 0.0197391 total: 322ms remaining: 264ms 110: learn: 0.0194175 total: 326ms remaining: 261ms 111: learn: 0.0191088 total: 329ms remaining: 258ms 112: learn: 0.0184258 total: 330ms remaining: 254ms 113: learn: 0.0181038 total: 334ms remaining: 252ms 114: learn: 0.0177560 total: 337ms remaining: 249ms 115: learn: 0.0174523 total: 340ms remaining: 246ms 116: learn: 0.0170665 total: 344ms remaining: 244ms 117: learn: 0.0167080 total: 347ms remaining: 241ms 118: learn: 0.0164368 total: 350ms remaining: 238ms 119: learn: 0.0161758 total: 353ms remaining: 236ms 120: learn: 0.0159163 total: 357ms remaining: 233ms 121: learn: 0.0156155 total: 360ms remaining: 230ms 122: learn: 0.0153268 total: 363ms remaining: 227ms 123: learn: 0.0148085 total: 364ms remaining: 223ms 124: learn: 0.0146222 total: 367ms remaining: 220ms 125: learn: 0.0143929 total: 371ms remaining: 218ms 126: learn: 0.0141229 total: 374ms remaining: 215ms 127: learn: 0.0139366 total: 377ms remaining: 212ms 128: learn: 0.0137235 total: 380ms remaining: 209ms 129: learn: 0.0135058 total: 384ms remaining: 207ms 130: learn: 0.0132373 total: 387ms remaining: 204ms 131: learn: 0.0130216 total: 390ms remaining: 201ms 132: learn: 0.0128189 total: 393ms remaining: 198ms 133: learn: 0.0126473 total: 397ms remaining: 195ms 134: learn: 0.0124232 total: 400ms remaining: 193ms 135: learn: 0.0121647 total: 404ms remaining: 190ms 136: learn: 0.0120279 total: 407ms remaining: 187ms 137: learn: 0.0117731 total: 410ms remaining: 184ms 138: learn: 0.0115865 total: 413ms remaining: 181ms 139: learn: 0.0113978 total: 417ms remaining: 179ms 140: learn: 0.0112161 total: 420ms remaining: 176ms 141: learn: 0.0110848 total: 424ms remaining: 173ms 142: learn: 0.0109318 total: 427ms remaining: 170ms 143: learn: 0.0106073 total: 429ms remaining: 167ms 144: learn: 0.0102736 total: 430ms remaining: 163ms 145: learn: 0.0101610 total: 433ms remaining: 160ms 146: learn: 0.0099851 total: 437ms remaining: 157ms 147: learn: 0.0098476 total: 440ms remaining: 155ms 148: learn: 0.0097119 total: 443ms remaining: 152ms 149: learn: 0.0094119 total: 445ms remaining: 148ms 150: learn: 0.0092983 total: 448ms remaining: 145ms 151: learn: 0.0091583 total: 451ms remaining: 142ms 152: learn: 0.0090601 total: 454ms remaining: 139ms 153: learn: 0.0089563 total: 457ms remaining: 137ms 154: learn: 0.0088347 total: 460ms remaining: 134ms 155: learn: 0.0087051 total: 463ms remaining: 131ms 156: learn: 0.0086108 total: 466ms remaining: 128ms 157: learn: 0.0084889 total: 469ms remaining: 125ms 158: learn: 0.0083861 total: 472ms remaining: 122ms 159: learn: 0.0082546 total: 475ms remaining: 119ms 160: learn: 0.0081076 total: 477ms remaining: 115ms 161: learn: 0.0079875 total: 480ms remaining: 113ms 162: learn: 0.0078931 total: 483ms remaining: 110ms 163: learn: 0.0077882 total: 486ms remaining: 107ms 164: learn: 0.0077308 total: 489ms remaining: 104ms 165: learn: 0.0076229 total: 492ms remaining: 101ms 166: learn: 0.0075257 total: 495ms remaining: 97.8ms 167: learn: 0.0074029 total: 498ms remaining: 94.9ms 168: learn: 0.0072770 total: 500ms remaining: 91.6ms 169: learn: 0.0071588 total: 503ms remaining: 88.7ms 170: learn: 0.0070895 total: 506ms remaining: 85.8ms 171: learn: 0.0070303 total: 509ms remaining: 82.9ms 172: learn: 0.0069656 total: 512ms remaining: 79.9ms 173: learn: 0.0068813 total: 515ms remaining: 77ms 174: learn: 0.0068259 total: 518ms remaining: 74ms 175: learn: 0.0067628 total: 521ms remaining: 71.1ms 176: learn: 0.0066664 total: 524ms remaining: 68.1ms 177: learn: 0.0065542 total: 527ms remaining: 65.1ms 178: learn: 0.0065009 total: 530ms remaining: 62.1ms 179: learn: 0.0064268 total: 533ms remaining: 59.2ms 180: learn: 0.0063803 total: 536ms remaining: 56.2ms 181: learn: 0.0062920 total: 539ms remaining: 53.3ms 182: learn: 0.0062361 total: 542ms remaining: 50.3ms 183: learn: 0.0061654 total: 545ms remaining: 47.4ms 184: learn: 0.0060935 total: 548ms remaining: 44.4ms 185: learn: 0.0060179 total: 551ms remaining: 41.5ms 186: learn: 0.0059646 total: 554ms remaining: 38.5ms 187: learn: 0.0058999 total: 557ms remaining: 35.6ms 188: learn: 0.0058299 total: 560ms remaining: 32.6ms 189: learn: 0.0057614 total: 563ms remaining: 29.7ms 190: learn: 0.0056890 total: 566ms remaining: 26.7ms 191: learn: 0.0056322 total: 569ms remaining: 23.7ms 192: learn: 0.0055756 total: 573ms remaining: 20.8ms 193: learn: 0.0055162 total: 576ms remaining: 17.8ms 194: learn: 0.0054634 total: 579ms remaining: 14.8ms 195: learn: 0.0054088 total: 582ms remaining: 11.9ms 196: learn: 0.0053461 total: 585ms remaining: 8.91ms 197: learn: 0.0053016 total: 588ms remaining: 5.94ms 198: learn: 0.0052433 total: 591ms remaining: 2.97ms 199: learn: 0.0051680 total: 595ms remaining: 0us
y_pred = catb_tuned.predict(X_test)
accuracy_score(y_test, y_pred)
1.0
Tüm Modellerin Karşılaştırılması¶
modeller = [
knn_tuned,
loj_model,
nb_model,
mlpc_tuned,
cart_tuned,
rf_tuned,
gbm_tuned,
catb_tuned,
lgbm_tuned,
xgb_tuned
]
for model in modeller:
if(model!=mlpc_tuned):
y_pred = model.predict(X_test)
else:
y_pred = model.predict(X_test_scaled)
dogruluk = accuracy_score(y_test, y_pred)
isimler = model.__class__.__name__
print("-"*30)
print(isimler + ":" )
print("Accuracy: {:.4%}".format(dogruluk))
------------------------------ KNeighborsClassifier: Accuracy: 61.0461% ------------------------------ LogisticRegression: Accuracy: 62.6854% ------------------------------ GaussianNB: Accuracy: 76.8150% ------------------------------ MLPClassifier: Accuracy: 100.0000% ------------------------------ DecisionTreeClassifier: Accuracy: 100.0000% ------------------------------ RandomForestClassifier: Accuracy: 99.7658% ------------------------------ GradientBoostingClassifier: Accuracy: 100.0000% ------------------------------ CatBoostClassifier: Accuracy: 100.0000% ------------------------------ LGBMClassifier: Accuracy: 100.0000% ------------------------------ XGBClassifier: Accuracy: 100.0000%
sonuclar = pd.DataFrame(columns=["modeller", "Accuracy"])
for model in modeller:
if model != mlpc_tuned:
y_pred = model.predict(X_test)
else:
y_pred = model.predict(X_test_scaled)
dogruluk = accuracy_score(y_test, y_pred)
isimler = model.__class__.__name__
sonuc = pd.DataFrame([[isimler, dogruluk * 100]], columns=["modeller", "Accuracy"])
sonuclar = pd.concat([sonuclar, sonuc], ignore_index=True)
sns.barplot(x='Accuracy', y='modeller', data=sonuclar, color="r")
plt.xlabel('Accuracy %')
plt.title('Modellerin Doğruluk Oranları')
plt.show()
Modellerin Kaydedilmesi ve Tekrar Kullanılması¶
# Save Model Using Pickle
import pandas
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
import pickle
#uzak sunucudan datanin cekilmesi
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv(url, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
test_size = 0.33
seed = 7
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)
# Fit the model on training set
model = LogisticRegression()
model.fit(X_train, Y_train)
# save the model to disk
filename = 'data/finalized_model.sav'
pickle.dump(model, open(filename, 'wb'))
# load the model from disk
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test, Y_test)
print(result)